Write an implementation of the lexer in Rust (#7132)

Co-authored-by: Alan Cruickshank <alanmcruickshank@gmail.com>
2025-12-17 19:31:32 +00:00 · 2025-10-23 17:49:35 -04:00
parent b1a9d8a436
commit 6124c61a7c
108 changed files with 41538 additions and 43 deletions
--- a/.github/workflows/ci-test-python.yml
+++ b/.github/workflows/ci-test-python.yml
@@ -25,12 +25,15 @@ on:
        required: false
        type: boolean
        default: false
+      with-rust:
+        required: true
+        type: string
    secrets:
      gh_token:
        required: true

 concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.python-version }}-${{ inputs.marks }}-${{ inputs.coverage }}
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.python-version }}-${{ inputs.marks }}-${{ inputs.coverage }}-${{ inputs.with-rust }}
  cancel-in-progress: true

 jobs:
@@ -49,6 +52,14 @@ jobs:
          setup.cfg
          requirements_dev.txt

+    - name: Download built wheels
+      if: ${{ inputs.with_rust }} == '-rust'
+      uses: actions/download-artifact@v4
+      with:
+        path: ./dist
+        pattern: wheels-*
+        merge-multiple: true
+
    - name: Install dependencies
      run: pip install tox

@@ -69,10 +80,10 @@ jobs:
      # NOTE: We have a separate job for coverage reporting because
      # it impacts performance and slows the test suite significantly.
      if: ${{ inputs.coverage }}
-      run: tox -e py${{ steps.py_version.outputs.PYVERSION }} -- --cov=sqlfluff -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
+      run: tox -e py${{ steps.py_version.outputs.PYVERSION }}${{ inputs.with-rust }} -- --cov=sqlfluff -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
    - name: Run the tests (without coverage)
      if: ${{ !inputs.coverage }}
-      run: tox -e py${{ steps.py_version.outputs.PYVERSION }} -- -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
+      run: tox -e py${{ steps.py_version.outputs.PYVERSION }}${{ inputs.with-rust }} -- -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0

    - name: Rename coverage files with suffix
      # NOTE: We do this because we're using the same tox environment for multiple
@@ -88,7 +99,7 @@ jobs:
      uses: actions/upload-artifact@v4
      if: ${{ inputs.coverage }}
      with:
-        name: coverage-data-py${{ inputs.python-version }}-${{ inputs.marks }}
+        name: coverage-data-py${{ inputs.python-version }}-${{ inputs.marks }}${{ inputs.with-rust }}
        path: ".coverage.*"
        if-no-files-found: ignore
        include-hidden-files: true
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -45,6 +45,7 @@ jobs:
            "mypy",
            "mypyc",
            "doctests",
+            "check-rs",
          ]
        include:
          # Default to most recent python version
@@ -64,9 +65,144 @@ jobs:
    - name: Run the tests
      run: tox -e ${{ matrix.job }}

+  rs-build-linux:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: ubuntu-latest
+            target: x86_64
+          - runner: ubuntu-latest
+            target: x86
+          - runner: ubuntu-latest
+            target: aarch64
+          - runner: ubuntu-latest
+            target: armv7
+          - runner: ubuntu-latest
+            target: s390x
+          - runner: ubuntu-latest
+            target: ppc64le
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+          manylinux: auto
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-linux-${{ matrix.platform.target }}
+          path: dist
+
+  rs-build-musllinux:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: ubuntu-latest
+            target: x86_64
+          - runner: ubuntu-latest
+            target: x86
+          - runner: ubuntu-latest
+            target: aarch64
+          - runner: ubuntu-latest
+            target: armv7
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+          manylinux: musllinux_1_2
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-musllinux-${{ matrix.platform.target }}
+          path: dist
+
+  rs-build-windows:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: windows-latest
+            target: x64
+          - runner: windows-latest
+            target: x86
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+          architecture: ${{ matrix.platform.target }}
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-windows-${{ matrix.platform.target }}
+          path: dist
+
+  rs-build-macos:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: macos-13
+            target: x86_64
+          - runner: macos-14
+            target: aarch64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-macos-${{ matrix.platform.target }}
+          path: dist
+
+  rs-build-sdist:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build sdist
+        uses: PyO3/maturin-action@v1
+        with:
+          command: sdist
+          args: --out dist --manifest-path sqlfluffrs/Cargo.toml
+      - name: Upload sdist
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-sdist
+          path: dist
+
  # Test with coverage tracking on most recent python (py313).
  python-version-tests:
    name: Python Tests
+    needs: rs-build-linux
    strategy:
      matrix:
        python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
@@ -77,6 +213,7 @@ jobs:
          # Override coverage to be true for most recent python version.
          - python-version: "3.13"
            coverage: true
+        with-rust: [ "-rust", "" ]
    permissions:
      contents: read
      pull-requests: write
@@ -84,6 +221,7 @@ jobs:
    with:
      python-version: ${{ matrix.python-version }}
      coverage: ${{ matrix.coverage }}
+      with-rust: ${{ matrix.with-rust }}
    secrets:
      gh_token: ${{ secrets.github_token }}

@@ -114,9 +252,12 @@ jobs:
      gh_token: ${{ secrets.github_token }}

  dialect-tests:
-    name: Dialect ${{ matrix.marks }}
+    name: Dialect ${{ matrix.marks }}${{ matrix.with-rust }}
+    needs: rs-build-linux
    strategy:
      matrix:
+        marks: [ "parse_suite", "fix_suite", "rules_suite" ]
+        with-rust: [ "-rust", "" ]
        include:
          # This runs the bulk of the dialect _parsing_ tests.
          #
@@ -149,6 +290,7 @@ jobs:
      python-version: "3.13"
      marks: ${{ matrix.marks }}
      coverage: ${{ matrix.coverage }}
+      with-rust: ${{ matrix.with-rust }}
    secrets:
      gh_token: ${{ secrets.github_token }}

--- a/.github/workflows/publish-sqlfluffrs-release-to-pypi.yaml
+++ b/.github/workflows/publish-sqlfluffrs-release-to-pypi.yaml
@@ -0,0 +1,172 @@
+name: Publish SQLFluff-rs PyPI Version
+
+on:
+  release:
+    types:
+      - published
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  linux:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: ubuntu-22.04
+            target: x86_64
+          - runner: ubuntu-22.04
+            target: x86
+          - runner: ubuntu-22.04
+            target: aarch64
+          - runner: ubuntu-22.04
+            target: armv7
+          - runner: ubuntu-22.04
+            target: s390x
+          - runner: ubuntu-22.04
+            target: ppc64le
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+          manylinux: auto
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-linux-${{ matrix.platform.target }}
+          path: dist
+
+  musllinux:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: ubuntu-22.04
+            target: x86_64
+          - runner: ubuntu-22.04
+            target: x86
+          - runner: ubuntu-22.04
+            target: aarch64
+          - runner: ubuntu-22.04
+            target: armv7
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+          manylinux: musllinux_1_2
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-musllinux-${{ matrix.platform.target }}
+          path: dist
+
+  windows:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: windows-latest
+            target: x64
+          - runner: windows-latest
+            target: x86
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+          architecture: ${{ matrix.platform.target }}
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-windows-${{ matrix.platform.target }}
+          path: dist
+
+  macos:
+    runs-on: ${{ matrix.platform.runner }}
+    strategy:
+      matrix:
+        platform:
+          - runner: macos-13
+            target: x86_64
+          - runner: macos-14
+            target: aarch64
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: 3.x
+      - name: Build wheels
+        uses: PyO3/maturin-action@v1
+        with:
+          target: ${{ matrix.platform.target }}
+          args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
+          sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
+      - name: Upload wheels
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-macos-${{ matrix.platform.target }}
+          path: dist
+
+  sdist:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Build sdist
+        uses: PyO3/maturin-action@v1
+        with:
+          command: sdist
+          args: --out dist --manifest-path sqlfluffrs/Cargo.toml
+      - name: Upload sdist
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheels-sdist
+          path: dist
+
+  release:
+    name: Release
+    runs-on: ubuntu-latest
+    if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
+    needs: [linux, musllinux, windows, macos, sdist]
+    permissions:
+      # Use to sign the release artifacts
+      id-token: write
+      # Used to upload release artifacts
+      contents: write
+      # Used to generate artifact attestation
+      attestations: write
+    steps:
+      - uses: actions/download-artifact@v4
+      - name: Generate artifact attestation
+        uses: actions/attest-build-provenance@v2
+        with:
+          subject-path: 'wheels-*/*'
+      - name: Publish to PyPI
+        if: ${{ startsWith(github.ref, 'refs/tags/') }}
+        uses: PyO3/maturin-action@v1
+        env:
+          MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+        with:
+          command: upload
+          args: --non-interactive --skip-existing wheels-*/*
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -36,7 +36,7 @@ repos:
    hooks:
      - id: black
  - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.14.1
+    rev: v1.18.1
    hooks:
      - id: mypy
        additional_dependencies:
@@ -58,7 +58,7 @@ repos:
            pathspec,
            pytest,  # and by extension... pluggy
            click,
-            platformdirs
+            platformdirs,
          ]
        files: ^src/sqlfluff/.*
        # The mypy pre-commit hook by default sets a few arguments that we don't normally
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -224,6 +224,10 @@ for development, and which parts of the test suite you may find most useful.
   runs to specific dialects to further improve iteration speed. e.g.
   - `tox -e generate-fixture-yml -- -d mysql` will run just the mysql tests.
   - `python test/generate_parse_fixture_yml.py -d mysql` will do the same.
+   As you make changes to a dialect, you will also need to regenerate the Rust
+   dialects to keep them in sync. To do this, run `tox -e generate-rs` (if using
+   tox), or, with sqlfluff installed in a virtual environment, run
+   `utils/rustify.py build` to resync the languages.
 2. Developing for the dbt templater should only require running the dbt test
   suite (see below).
 3. Developing rules and rule plugins there are a couple of scenarios.
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,12 +7,10 @@ build-backend = "setuptools.build_meta"
 name = "sqlfluff"
 version = "3.5.0"
 description = "The SQL Linter for Humans"
-readme = {file = "README.md", content-type = "text/markdown"}
+readme = { file = "README.md", content-type = "text/markdown" }
 requires-python = ">=3.9"
-authors = [
-  {name = "Alan Cruickshank", email = "alan@designingoverload.com"},
-]
-license = {file = "LICENSE.md"}
+authors = [{ name = "Alan Cruickshank", email = "alan@designingoverload.com" }]
+license = { file = "LICENSE.md" }
 classifiers = [
    "Development Status :: 5 - Production/Stable",
    "Environment :: Console",
@@ -99,6 +97,9 @@ dependencies = [
    "tqdm",
 ]

+[project.optional-dependencies]
+rs = ["sqlfluffrs~=0.1.0"]
+
 [project.urls]
 Homepage = "https://www.sqlfluff.com"
 Documentation = "https://docs.sqlfluff.com"
@@ -148,9 +149,7 @@ root_package = "sqlfluff"
 [[tool.importlinter.contracts]]
 name = "Forbid dependencies outside core"
 type = "forbidden"
-source_modules = [
-    "sqlfluff.core",
-]
+source_modules = ["sqlfluff.core"]
 forbidden_modules = [
    "sqlfluff.api",
    "sqlfluff.cli",
@@ -162,12 +161,8 @@ forbidden_modules = [
 [[tool.importlinter.contracts]]
 name = "API may not depend on CLI"
 type = "forbidden"
-source_modules = [
-    "sqlfluff.api",
-]
-forbidden_modules = [
-    "sqlfluff.cli",
-]
+source_modules = ["sqlfluff.api"]
+forbidden_modules = ["sqlfluff.cli"]

 [[tool.importlinter.contracts]]
 name = "Helper methods must be internally independent"
@@ -222,6 +217,7 @@ warn_unused_ignores = true
 strict_equality = true
 extra_checks = true
 no_implicit_reexport = true
+mypy_path = "$MYPY_CONFIG_FILE_DIR/sqlfluffrs"

 # skip type checking for 3rd party packages for which stubs are not available
 [[tool.mypy.overrides]]
@@ -232,7 +228,6 @@ ignore_missing_imports = true
 module = "tblib.*"
 ignore_missing_imports = true

-
 [tool.ruff.lint]
 extend-select = ["I", "D"]

@@ -280,7 +275,7 @@ ignore-path = "docs/source/_partials/"
 skip = "*/test/fixtures/*,*/.*,*/pyproject.toml"

 check-hidden = true
-quiet-level=2
+quiet-level = 2
 # ignore-regex = '\\[fnrstv]'
 builtin = "clear,rare,informal,names"

@@ -288,7 +283,7 @@ ignore-words-list = "fo,ws,falsy,coo,inout,deque,crate,trough,ro,mange,identifer

 # ignore-words = "dev/tools/codespell/codespell-ignore.txt"
 # exclude-file = "dev/tools/codespell/codespell-lines-ignore.txt"
-uri-ignore-words-list="crate"
+uri-ignore-words-list = "crate"

 # For future reference: it is not currently possible to specify
 # the standard dictionary and the custom dictionary in the configuration
--- a/sqlfluffrs/.gitignore
+++ b/sqlfluffrs/.gitignore
@@ -0,0 +1,72 @@
+/target
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.pytest_cache/
+*.py[cod]
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+.venv/
+env/
+bin/
+build/
+develop-eggs/
+dist/
+eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+include/
+man/
+venv/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+pip-selfcheck.json
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
+
+# Rope
+.ropeproject
+
+# Django stuff:
+*.log
+*.pot
+
+.DS_Store
+
+# Sphinx documentation
+docs/_build/
+
+# PyCharm
+.idea/
+
+# VSCode
+.vscode/
+
+# Pyenv
+.python-version
--- a/sqlfluffrs/Cargo.lock
+++ b/sqlfluffrs/Cargo.lock
@@ -0,0 +1,731 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "allocator-api2"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
+
+[[package]]
+name = "anstream"
+version = "0.6.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
+dependencies = [
+ "anstyle",
+ "anstyle-parse",
+ "anstyle-query",
+ "anstyle-wincon",
+ "colorchoice",
+ "is_terminal_polyfill",
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle"
+version = "1.0.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+
+[[package]]
+name = "anstyle-parse"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
+dependencies = [
+ "utf8parse",
+]
+
+[[package]]
+name = "anstyle-query"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
+dependencies = [
+ "windows-sys",
+]
+
+[[package]]
+name = "anstyle-wincon"
+version = "3.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
+dependencies = [
+ "anstyle",
+ "once_cell",
+ "windows-sys",
+]
+
+[[package]]
+name = "arc-swap"
+version = "1.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
+
+[[package]]
+name = "autocfg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+
+[[package]]
+name = "bincode"
+version = "1.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
+dependencies = [
+ "serde",
+]
+
+[[package]]
+name = "bit-set"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
+dependencies = [
+ "bit-vec",
+]
+
+[[package]]
+name = "bit-vec"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "cfg-if"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "colorchoice"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
+
+[[package]]
+name = "either"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+
+[[package]]
+name = "env_filter"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
+dependencies = [
+ "log",
+ "regex",
+]
+
+[[package]]
+name = "env_logger"
+version = "0.11.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
+dependencies = [
+ "anstream",
+ "anstyle",
+ "env_filter",
+ "jiff",
+ "log",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "fancy-regex"
+version = "0.16.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
+dependencies = [
+ "bit-set",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "getrandom"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasi",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "allocator-api2",
+ "equivalent",
+ "foldhash",
+]
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "indoc"
+version = "2.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
+
+[[package]]
+name = "is_terminal_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "jiff"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
+dependencies = [
+ "jiff-static",
+ "log",
+ "portable-atomic",
+ "portable-atomic-util",
+ "serde",
+]
+
+[[package]]
+name = "jiff-static"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.80"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.169"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+
+[[package]]
+name = "log"
+version = "0.4.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
+
+[[package]]
+name = "memchr"
+version = "2.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.21.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+
+[[package]]
+name = "portable-atomic"
+version = "1.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+
+[[package]]
+name = "portable-atomic-util"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
+dependencies = [
+ "portable-atomic",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.93"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "pyo3"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
+dependencies = [
+ "hashbrown",
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+ "uuid",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
+dependencies = [
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-log"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "833e6fdc21553e9938d9443050ed3c7787ac3c1a1aefccbd03dfae0c7a4be529"
+dependencies = [
+ "arc-swap",
+ "log",
+ "pyo3",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.26.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.38"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "regex"
+version = "1.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "serde"
+version = "1.0.225"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d"
+dependencies = [
+ "serde_core",
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.225"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.225"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.145"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "slotmap"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a"
+dependencies = [
+ "version_check",
+]
+
+[[package]]
+name = "sqlfluffrs"
+version = "0.1.0"
+dependencies = [
+ "bincode",
+ "env_logger",
+ "fancy-regex",
+ "hashbrown",
+ "itertools",
+ "log",
+ "once_cell",
+ "pyo3",
+ "pyo3-log",
+ "regex",
+ "serde",
+ "serde_json",
+ "slotmap",
+ "uuid",
+]
+
+[[package]]
+name = "syn"
+version = "2.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
+
+[[package]]
+name = "unindent"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
+
+[[package]]
+name = "utf8parse"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
+
+[[package]]
+name = "uuid"
+version = "1.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
+dependencies = [
+ "getrandom",
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wasi"
+version = "0.14.7+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
+dependencies = [
+ "wasip2",
+]
+
+[[package]]
+name = "wasip2"
+version = "1.0.1+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-backend"
+version = "0.2.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c"
+dependencies = [
+ "bumpalo",
+ "log",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-backend",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.103"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm",
+ "windows_aarch64_msvc",
+ "windows_i686_gnu",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc",
+ "windows_x86_64_gnu",
+ "windows_x86_64_gnullvm",
+ "windows_x86_64_msvc",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
--- a/sqlfluffrs/Cargo.toml
+++ b/sqlfluffrs/Cargo.toml
@@ -0,0 +1,32 @@
+[package]
+name = "sqlfluffrs"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[lib]
+name = "sqlfluffrs"
+crate-type = ["cdylib"]
+
+[features]
+unicode = []
+python = ["unicode", "pyo3"]
+
+[dependencies]
+env_logger = "0.11.8"
+fancy-regex = "0.16.2"
+hashbrown = "0.15.5"
+itertools = "0.14.0"
+log = "0.4.28"
+once_cell = "1.21.3"
+pyo3 = { version = "0.26.0", optional = true, features = ["hashbrown", "extension-module", "uuid"] }
+pyo3-log = { version = "0.13.0", optional = true }
+regex = { version = "1.11.2", features = ["perf"] }
+slotmap = "1.0.7"
+uuid = { version = "1.18.1", features = ["v4"] }
+serde = { version = "1.0.225", features = ["derive"] }
+serde_json = "1.0.145"
+bincode = "1.3.3"
+
+[dev-dependencies]
+env_logger = "0.11.6"
--- a/sqlfluffrs/LICENSE.md
+++ b/sqlfluffrs/LICENSE.md
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Alan Cruickshank
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/sqlfluffrs/README.md
+++ b/sqlfluffrs/README.md
@@ -0,0 +1,20 @@
+# SQLFluff-rs
+
+This package is an optional installation for [SQLFluff](https://github.com/sqlfluff/sqlfluff) and is **not** intended to be used as a standalone linting solution.
+
+## Purpose
+
+SQLFluff-rs serves as a Rust-based component that can be integrated with the main SQLFluff package. It is currently in development and should be considered experimental.
+
+## Installation
+
+This package is automatically handled when installing SQLFluff with the appropriate optional dependencies. Direct installation or standalone usage is not supported.
+
+To install from pip:
+```sh
+pip install sqlfluff[rs]
+```
+
+## Development Status
+
+This is a supplementary component and is not meant to replace or function independently of the main SQLFluff package. For SQL linting, please use the main [SQLFluff](https://github.com/sqlfluff/sqlfluff) package.
--- a/sqlfluffrs/py.typed
+++ b/sqlfluffrs/py.typed
--- a/sqlfluffrs/pyproject.toml
+++ b/sqlfluffrs/pyproject.toml
@@ -0,0 +1,43 @@
+[build-system]
+requires = ["maturin>=1.8,<2.0"]
+build-backend = "maturin"
+
+[project]
+name = "sqlfluffrs"
+readme = { file = "README.md", content-type = "text/markdown" }
+license = { file = "LICENSE.md" }
+description = "The SQL Linter for Humans"
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: Unix",
+    "Operating System :: POSIX",
+    "Operating System :: MacOS",
+    "Operating System :: Microsoft :: Windows",
+    "Programming Language :: Rust",
+    "Programming Language :: Python",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: Implementation :: CPython",
+    "Programming Language :: Python :: Implementation :: PyPy",
+    "Programming Language :: SQL",
+    "Topic :: Utilities",
+    "Topic :: Software Development :: Quality Assurance",
+]
+dynamic = ["version"]
+
+[project.urls]
+Homepage = "https://www.sqlfluff.com"
+Documentation = "https://docs.sqlfluff.com"
+Source = "https://github.com/sqlfluff/sqlfluff"
+"Issue Tracker" = "https://github.com/sqlfluff/sqlfluff/issues"
+
+[tool.maturin]
+features = ["pyo3/extension-module", "python"]
--- a/sqlfluffrs/sqlfluffrs.pyi
+++ b/sqlfluffrs/sqlfluffrs.pyi
@@ -0,0 +1,143 @@
+from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
+from uuid import UUID
+
+if TYPE_CHECKING:
+    from sqlfluff.core.config import FluffConfig
+    from sqlfluff.core.parser.lexer import StringLexer
+    from sqlfluff.core.parser.segments import SourceFix
+    from sqlfluff.core.templaters import TemplatedFile
+
+SerializedObject = dict[str, Union[str, int, bool, list["SerializedObject"]]]
+TupleSerialisedSegment = tuple[str, Union[str, tuple["TupleSerialisedSegment", ...]]]
+
+class Slice: ...
+
+class RsRawFileSlice:
+    raw: str
+    slice_type: str
+    source_idx: int
+    block_idx: int
+    tag: Optional[str]
+
+class RsTemplatedFileSlice:
+    slice_type: str
+    source_slice: Slice
+    templated_slice: Slice
+
+class RsTemplatedFile:
+    source_str: str
+    fname: str
+    templated_str: str
+    sliced_file: List[RsTemplatedFileSlice]
+    raw_sliced: List[RsRawFileSlice]
+
+class RsPositionMarker:
+    source_slice: slice
+    templated_slice: slice
+    templated_file: RsTemplatedFile
+    working_line_no: int
+    working_line_pos: int
+
+class RsToken:
+    raw: str
+    pos_marker: RsPositionMarker
+    type: str
+    uuid: Optional[int]
+    source_fixes: Optional[list["SourceFix"]]
+
+    def raw_trimmed(self) -> str: ...
+    @property
+    def is_templated(self) -> bool: ...
+    @property
+    def is_code(self) -> bool: ...
+    @property
+    def is_meta(self) -> bool: ...
+    @property
+    def source_str(self) -> str: ...
+    @property
+    def block_type(self) -> str: ...
+    @property
+    def block_uuid(self) -> Optional[UUID]: ...
+    @property
+    def cache_key(self) -> str: ...
+    @property
+    def trim_start(self) -> Optional[tuple[str]]: ...
+    @property
+    def trim_chars(self) -> Optional[tuple[str]]: ...
+    @property
+    def quoted_value(self) -> Optional[tuple[str, int | str]]: ...
+    @property
+    def escape_replacements(self) -> Optional[list[tuple[str, str]]]: ...
+    def count_segments(self, raw_only: bool = False) -> int: ...
+    def get_type(self) -> str: ...
+    def recursive_crawl(
+        self,
+        seg_type: Tuple[str, ...],
+        recurse_into: bool,
+        no_recursive_seg_type: Optional[Union[str, List[str]]] = None,
+        allow_self: bool = True,
+    ) -> List["RsToken"]: ...
+    def recursive_crawl_all(self, reverse: bool) -> List["RsToken"]: ...
+    @property
+    def segments(self) -> List["RsToken"]: ...
+    def path_to(self, other: "RsToken") -> List[Any]: ...
+    def get_start_loc(self) -> Tuple[int, int]: ...
+    def get_end_loc(self) -> Tuple[int, int]: ...
+    @property
+    def raw_segments(self) -> List["RsToken"]: ...
+    def copy(
+        self,
+        segments: Optional[List["RsToken"]] = None,
+        parent: Optional[Any] = None,
+        parent_idx: Optional[int] = None,
+    ) -> "RsToken": ...
+    def edit(
+        self,
+        raw: Optional[str] = None,
+        source_fixes: Optional[List[Any]] = None,
+    ) -> "RsToken": ...
+    def to_tuple(
+        self,
+        code_only: Optional[bool] = None,
+        show_raw: Optional[bool] = None,
+        include_meta: Optional[bool] = None,
+    ) -> TupleSerialisedSegment: ...
+    def __repr__(self) -> str: ...
+    @property
+    def instance_types(self) -> List[str]: ...
+
+class RsSQLLexerError:
+    desc: str
+    line_no: int
+    line_pos: int
+    ignore: bool
+    warning: bool
+    fatal: bool
+
+    def __init__(
+        self,
+        msg: Optional[str] = None,
+        pos: Optional[RsPositionMarker] = None,
+        line_no: int = 0,
+        line_pos: int = 0,
+        ignore: bool = False,
+        warning: bool = False,
+        fatal: bool = False,
+    ) -> None: ...
+    def rule_code(self) -> str: ...
+    def rule_name(self) -> str: ...
+    def source_signature(self) -> Tuple[Tuple[str, int, int], str]: ...
+    def to_dict(self) -> SerializedObject: ...
+    def ignore_if_in(self, ignore_iterable: list[str]) -> None: ...
+    def warning_if_in(self, ignore_iterable: list[str]) -> None: ...
+
+class RsLexer:
+    def __init__(
+        self,
+        config: Optional["FluffConfig"] = None,
+        last_resort_lexer: Optional["StringLexer"] = None,
+        dialect: Optional[str] = None,
+    ): ...
+    def _lex(
+        self, lex_input: Union[str, "TemplatedFile"]
+    ) -> Tuple[List[RsToken], List[Any]]: ...
--- a/sqlfluffrs/src/config/fluffconfig.rs
+++ b/sqlfluffrs/src/config/fluffconfig.rs
@@ -0,0 +1,44 @@
+#[derive(Clone)]
+pub struct FluffConfig {
+    pub dialect: Option<String>,
+    pub template_blocks_indent: bool,
+}
+
+impl FluffConfig {
+    pub fn new(dialect: Option<String>, template_blocks_indent: bool) -> Self {
+        Self {
+            dialect,
+            template_blocks_indent,
+        }
+    }
+}
+
+#[cfg(feature = "python")]
+pub mod python {
+    use pyo3::{
+        prelude::*,
+        types::{PyDict, PyDictMethods},
+    };
+
+    use super::FluffConfig;
+
+    #[derive(Clone)]
+    pub struct PyFluffConfig(pub FluffConfig);
+
+    impl<'py> FromPyObject<'py> for PyFluffConfig {
+        fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
+            let configs = ob.getattr("_configs")?;
+            let configs_dict = configs.downcast::<PyDict>()?;
+            let core = configs_dict.get_item("core").ok().flatten().unwrap();
+            let core_dict = core.downcast::<PyDict>()?;
+            let dialect = core_dict
+                .get_item("dialect")
+                .ok()
+                .flatten()
+                .and_then(|x| x.extract::<String>().ok());
+
+            // println!("{:?}", dialect);
+            Ok(Self(FluffConfig::new(dialect, true)))
+        }
+    }
+}
--- a/sqlfluffrs/src/config/mod.rs
+++ b/sqlfluffrs/src/config/mod.rs
@@ -0,0 +1 @@
+pub mod fluffconfig;
--- a/sqlfluffrs/src/dialect/ansi/matcher.rs
+++ b/sqlfluffrs/src/dialect/ansi/matcher.rs
@@ -0,0 +1,895 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static ANSI_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "CASE".to_string(),
+    "CROSS".to_string(),
+    "FULL".to_string(),
+    "IGNORE".to_string(),
+    "INNER".to_string(),
+    "INTERVAL".to_string(),
+    "JOIN".to_string(),
+    "LEFT".to_string(),
+    "NATURAL".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PARTITION".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROWS".to_string(),
+    "SELECT".to_string(),
+    "SET".to_string(),
+    "UNION".to_string(),
+    "USING".to_string(),
+]});
+
+pub static ANSI_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "inline_comment",
+        r#"(--|#)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("--"), String::from("#")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Ansi,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Ansi,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "single_quote",
+        r#"'([^'\\]|\\.|'')*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "double_quote",
+        r#""(""|[^"\\]|\\.)*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "back_quote",
+        r#"`(?:[^`\\]|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "numeric_literal",
+        r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::literal_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Ansi,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Ansi,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/ansi/mod.rs
+++ b/sqlfluffrs/src/dialect/ansi/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/athena/matcher.rs
+++ b/sqlfluffrs/src/dialect/athena/matcher.rs
--- a/sqlfluffrs/src/dialect/athena/mod.rs
+++ b/sqlfluffrs/src/dialect/athena/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/bigquery/matcher.rs
+++ b/sqlfluffrs/src/dialect/bigquery/matcher.rs
--- a/sqlfluffrs/src/dialect/bigquery/mod.rs
+++ b/sqlfluffrs/src/dialect/bigquery/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/clickhouse/matcher.rs
+++ b/sqlfluffrs/src/dialect/clickhouse/matcher.rs
@@ -0,0 +1,916 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static CLICKHOUSE_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "CASE".to_string(),
+    "CROSS".to_string(),
+    "FULL".to_string(),
+    "IGNORE".to_string(),
+    "INNER".to_string(),
+    "INTERVAL".to_string(),
+    "JOIN".to_string(),
+    "LEFT".to_string(),
+    "NATURAL".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PARTITION".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROWS".to_string(),
+    "SELECT".to_string(),
+    "SET".to_string(),
+    "UNION".to_string(),
+    "USING".to_string(),
+]});
+
+pub static CLICKHOUSE_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "inline_comment",
+        r#"(--|#)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("--"), String::from("#")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Clickhouse,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Clickhouse,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "single_quote",
+        r#"'([^'\\]|\\.|'')*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "double_quote",
+        r#""([^"\\]|""|\\.)*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"\\]|""|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"(""|\\")"#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "back_quote",
+        r#"`(?:[^`\\]|``|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|``|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"(``|\\`)"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "numeric_literal",
+        r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::literal_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "lambda",
+        "->",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::symbol_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Clickhouse,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Clickhouse,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/clickhouse/mod.rs
+++ b/sqlfluffrs/src/dialect/clickhouse/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/databricks/matcher.rs
+++ b/sqlfluffrs/src/dialect/databricks/matcher.rs
--- a/sqlfluffrs/src/dialect/databricks/mod.rs
+++ b/sqlfluffrs/src/dialect/databricks/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/db2/matcher.rs
+++ b/sqlfluffrs/src/dialect/db2/matcher.rs
@@ -0,0 +1,915 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static DB2_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "CASE".to_string(),
+    "CROSS".to_string(),
+    "FULL".to_string(),
+    "IGNORE".to_string(),
+    "INNER".to_string(),
+    "INTERVAL".to_string(),
+    "JOIN".to_string(),
+    "LEFT".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PARTITION".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROWS".to_string(),
+    "SELECT".to_string(),
+    "SET".to_string(),
+    "UNION".to_string(),
+    "USING".to_string(),
+]});
+
+pub static DB2_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "inline_comment",
+        r#"(--)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("-"), String::from("-")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Db2,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Db2,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "single_quote",
+        r#"'((?:[^']|'')*)'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^']|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "double_quote",
+        r#""((?:[^"]|"")*)""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"]|"")*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#""""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "back_quote",
+        r#"`(?:[^`\\]|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "numeric_literal",
+        r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::literal_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "right_arrow",
+        "=>",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Db2,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Db2,
+        "word",
+        r#"[0-9a-zA-Z_#]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/db2/mod.rs
+++ b/sqlfluffrs/src/dialect/db2/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/doris/matcher.rs
+++ b/sqlfluffrs/src/dialect/doris/matcher.rs
--- a/sqlfluffrs/src/dialect/doris/mod.rs
+++ b/sqlfluffrs/src/dialect/doris/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/duckdb/matcher.rs
+++ b/sqlfluffrs/src/dialect/duckdb/matcher.rs
--- a/sqlfluffrs/src/dialect/duckdb/mod.rs
+++ b/sqlfluffrs/src/dialect/duckdb/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/exasol/matcher.rs
+++ b/sqlfluffrs/src/dialect/exasol/matcher.rs
--- a/sqlfluffrs/src/dialect/exasol/mod.rs
+++ b/sqlfluffrs/src/dialect/exasol/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/flink/matcher.rs
+++ b/sqlfluffrs/src/dialect/flink/matcher.rs
@@ -0,0 +1,996 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static FLINK_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "ALL".to_string(),
+    "AND".to_string(),
+    "ANY".to_string(),
+    "AS".to_string(),
+    "AUTHORIZATION".to_string(),
+    "BETWEEN".to_string(),
+    "BIGINT".to_string(),
+    "BINARY".to_string(),
+    "BOOLEAN".to_string(),
+    "BOTH".to_string(),
+    "BY".to_string(),
+    "CASE".to_string(),
+    "CAST".to_string(),
+    "CHAR".to_string(),
+    "CHARACTER".to_string(),
+    "CHECK".to_string(),
+    "COLLATE".to_string(),
+    "COLUMN".to_string(),
+    "CONSTRAINT".to_string(),
+    "CREATE".to_string(),
+    "CROSS".to_string(),
+    "CURRENT_DATE".to_string(),
+    "CURRENT_TIME".to_string(),
+    "CURRENT_TIMESTAMP".to_string(),
+    "CURRENT_USER".to_string(),
+    "CURSOR".to_string(),
+    "DATE".to_string(),
+    "DAY".to_string(),
+    "DECIMAL".to_string(),
+    "DECLARE".to_string(),
+    "DELETE".to_string(),
+    "DESC".to_string(),
+    "DISTINCT".to_string(),
+    "DOUBLE".to_string(),
+    "DROP".to_string(),
+    "ELSE".to_string(),
+    "END".to_string(),
+    "ESCAPE".to_string(),
+    "EXCEPT".to_string(),
+    "EXISTS".to_string(),
+    "EXTRACT".to_string(),
+    "FALSE".to_string(),
+    "FETCH".to_string(),
+    "FILTER".to_string(),
+    "FLOAT".to_string(),
+    "FOR".to_string(),
+    "FOREIGN".to_string(),
+    "FROM".to_string(),
+    "FULL".to_string(),
+    "FUNCTION".to_string(),
+    "GRANT".to_string(),
+    "GROUP".to_string(),
+    "HAVING".to_string(),
+    "HOUR".to_string(),
+    "IF".to_string(),
+    "IGNORE".to_string(),
+    "IN".to_string(),
+    "INNER".to_string(),
+    "INSERT".to_string(),
+    "INT".to_string(),
+    "INTEGER".to_string(),
+    "INTERSECT".to_string(),
+    "INTERVAL".to_string(),
+    "INTO".to_string(),
+    "IS".to_string(),
+    "JOIN".to_string(),
+    "LEADING".to_string(),
+    "LEFT".to_string(),
+    "LIKE".to_string(),
+    "LIMIT".to_string(),
+    "LOCAL".to_string(),
+    "MINUTE".to_string(),
+    "MONTH".to_string(),
+    "NATURAL".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "NUMERIC".to_string(),
+    "OF".to_string(),
+    "ON".to_string(),
+    "ONLY".to_string(),
+    "OR".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "OVERLAPS".to_string(),
+    "OVERLAY".to_string(),
+    "PARTITION".to_string(),
+    "POSITION".to_string(),
+    "PRIMARY".to_string(),
+    "REAL".to_string(),
+    "REFERENCES".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROW".to_string(),
+    "ROWS".to_string(),
+    "SECOND".to_string(),
+    "SELECT".to_string(),
+    "SESSION_USER".to_string(),
+    "SET".to_string(),
+    "SMALLINT".to_string(),
+    "SOME".to_string(),
+    "SUBSTRING".to_string(),
+    "TABLE".to_string(),
+    "THEN".to_string(),
+    "TIME".to_string(),
+    "TIMESTAMP".to_string(),
+    "TINYINT".to_string(),
+    "TO".to_string(),
+    "TRAILING".to_string(),
+    "TRUE".to_string(),
+    "UNION".to_string(),
+    "UNIQUE".to_string(),
+    "UNKNOWN".to_string(),
+    "UPDATE".to_string(),
+    "USER".to_string(),
+    "USING".to_string(),
+    "VALUES".to_string(),
+    "VARCHAR".to_string(),
+    "WHEN".to_string(),
+    "WHERE".to_string(),
+    "WITH".to_string(),
+    "YEAR".to_string(),
+]});
+
+pub static FLINK_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "inline_comment",
+        r#"(--)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("-"), String::from("-")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Flink,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Flink,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "single_quote",
+        r#"'([^'\\]|\\.)*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.)*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "double_quote",
+        r#""(""|[^"\\]|\\.)*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "back_quote",
+        r#"`([^`]|``)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`]|``)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"``"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "numeric_literal",
+        r#"(?>(?>\d+\.\d+|\d+\.|\.\d+)([eE][+-]?\d+)?([dDfF]|BD|bd)?|\d+[eE][+-]?\d+([dDfF]|BD|bd)?|\d+([dDfFlLsSyY]|BD|bd)?)((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "equals",
+        r#"==|="#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Flink,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Flink,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/flink/mod.rs
+++ b/sqlfluffrs/src/dialect/flink/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/greenplum/matcher.rs
+++ b/sqlfluffrs/src/dialect/greenplum/matcher.rs
--- a/sqlfluffrs/src/dialect/greenplum/mod.rs
+++ b/sqlfluffrs/src/dialect/greenplum/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/hive/matcher.rs
+++ b/sqlfluffrs/src/dialect/hive/matcher.rs
--- a/sqlfluffrs/src/dialect/hive/mod.rs
+++ b/sqlfluffrs/src/dialect/hive/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/impala/matcher.rs
+++ b/sqlfluffrs/src/dialect/impala/matcher.rs
--- a/sqlfluffrs/src/dialect/impala/mod.rs
+++ b/sqlfluffrs/src/dialect/impala/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/mariadb/matcher.rs
+++ b/sqlfluffrs/src/dialect/mariadb/matcher.rs
--- a/sqlfluffrs/src/dialect/mariadb/mod.rs
+++ b/sqlfluffrs/src/dialect/mariadb/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/materialize/matcher.rs
+++ b/sqlfluffrs/src/dialect/materialize/matcher.rs
--- a/sqlfluffrs/src/dialect/materialize/mod.rs
+++ b/sqlfluffrs/src/dialect/materialize/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/mod.rs
+++ b/sqlfluffrs/src/dialect/mod.rs
@@ -0,0 +1,200 @@
+/* This is a generated file! */
+
+
+/* dialect mods */
+pub mod ansi;
+use crate::dialect::ansi::matcher::{ANSI_KEYWORDS, ANSI_LEXERS};
+pub mod athena;
+use crate::dialect::athena::matcher::{ATHENA_KEYWORDS, ATHENA_LEXERS};
+pub mod bigquery;
+use crate::dialect::bigquery::matcher::{BIGQUERY_KEYWORDS, BIGQUERY_LEXERS};
+pub mod clickhouse;
+use crate::dialect::clickhouse::matcher::{CLICKHOUSE_KEYWORDS, CLICKHOUSE_LEXERS};
+pub mod databricks;
+use crate::dialect::databricks::matcher::{DATABRICKS_KEYWORDS, DATABRICKS_LEXERS};
+pub mod db2;
+use crate::dialect::db2::matcher::{DB2_KEYWORDS, DB2_LEXERS};
+pub mod doris;
+use crate::dialect::doris::matcher::{DORIS_KEYWORDS, DORIS_LEXERS};
+pub mod duckdb;
+use crate::dialect::duckdb::matcher::{DUCKDB_KEYWORDS, DUCKDB_LEXERS};
+pub mod exasol;
+use crate::dialect::exasol::matcher::{EXASOL_KEYWORDS, EXASOL_LEXERS};
+pub mod flink;
+use crate::dialect::flink::matcher::{FLINK_KEYWORDS, FLINK_LEXERS};
+pub mod greenplum;
+use crate::dialect::greenplum::matcher::{GREENPLUM_KEYWORDS, GREENPLUM_LEXERS};
+pub mod hive;
+use crate::dialect::hive::matcher::{HIVE_KEYWORDS, HIVE_LEXERS};
+pub mod impala;
+use crate::dialect::impala::matcher::{IMPALA_KEYWORDS, IMPALA_LEXERS};
+pub mod mariadb;
+use crate::dialect::mariadb::matcher::{MARIADB_KEYWORDS, MARIADB_LEXERS};
+pub mod materialize;
+use crate::dialect::materialize::matcher::{MATERIALIZE_KEYWORDS, MATERIALIZE_LEXERS};
+pub mod mysql;
+use crate::dialect::mysql::matcher::{MYSQL_KEYWORDS, MYSQL_LEXERS};
+pub mod oracle;
+use crate::dialect::oracle::matcher::{ORACLE_KEYWORDS, ORACLE_LEXERS};
+pub mod postgres;
+use crate::dialect::postgres::matcher::{POSTGRES_KEYWORDS, POSTGRES_LEXERS};
+pub mod redshift;
+use crate::dialect::redshift::matcher::{REDSHIFT_KEYWORDS, REDSHIFT_LEXERS};
+pub mod snowflake;
+use crate::dialect::snowflake::matcher::{SNOWFLAKE_KEYWORDS, SNOWFLAKE_LEXERS};
+pub mod soql;
+use crate::dialect::soql::matcher::{SOQL_KEYWORDS, SOQL_LEXERS};
+pub mod sparksql;
+use crate::dialect::sparksql::matcher::{SPARKSQL_KEYWORDS, SPARKSQL_LEXERS};
+pub mod sqlite;
+use crate::dialect::sqlite::matcher::{SQLITE_KEYWORDS, SQLITE_LEXERS};
+pub mod starrocks;
+use crate::dialect::starrocks::matcher::{STARROCKS_KEYWORDS, STARROCKS_LEXERS};
+pub mod teradata;
+use crate::dialect::teradata::matcher::{TERADATA_KEYWORDS, TERADATA_LEXERS};
+pub mod trino;
+use crate::dialect::trino::matcher::{TRINO_KEYWORDS, TRINO_LEXERS};
+pub mod tsql;
+use crate::dialect::tsql::matcher::{TSQL_KEYWORDS, TSQL_LEXERS};
+pub mod vertica;
+use crate::dialect::vertica::matcher::{VERTICA_KEYWORDS, VERTICA_LEXERS};
+
+use crate::matcher::LexMatcher;
+use std::str::FromStr;
+
+#[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)]
+pub enum Dialect {
+    Ansi,
+    Athena,
+    Bigquery,
+    Clickhouse,
+    Databricks,
+    Db2,
+    Doris,
+    Duckdb,
+    Exasol,
+    Flink,
+    Greenplum,
+    Hive,
+    Impala,
+    Mariadb,
+    Materialize,
+    Mysql,
+    Oracle,
+    Postgres,
+    Redshift,
+    Snowflake,
+    Soql,
+    Sparksql,
+    Sqlite,
+    Starrocks,
+    Teradata,
+    Trino,
+    Tsql,
+    Vertica,
+}
+
+impl Dialect {
+    pub(crate) fn get_reserved_keywords(&self) -> &'static Vec<String> {
+        match self {
+            Dialect::Ansi => &ANSI_KEYWORDS,
+            Dialect::Athena => &ATHENA_KEYWORDS,
+            Dialect::Bigquery => &BIGQUERY_KEYWORDS,
+            Dialect::Clickhouse => &CLICKHOUSE_KEYWORDS,
+            Dialect::Databricks => &DATABRICKS_KEYWORDS,
+            Dialect::Db2 => &DB2_KEYWORDS,
+            Dialect::Doris => &DORIS_KEYWORDS,
+            Dialect::Duckdb => &DUCKDB_KEYWORDS,
+            Dialect::Exasol => &EXASOL_KEYWORDS,
+            Dialect::Flink => &FLINK_KEYWORDS,
+            Dialect::Greenplum => &GREENPLUM_KEYWORDS,
+            Dialect::Hive => &HIVE_KEYWORDS,
+            Dialect::Impala => &IMPALA_KEYWORDS,
+            Dialect::Mariadb => &MARIADB_KEYWORDS,
+            Dialect::Materialize => &MATERIALIZE_KEYWORDS,
+            Dialect::Mysql => &MYSQL_KEYWORDS,
+            Dialect::Oracle => &ORACLE_KEYWORDS,
+            Dialect::Postgres => &POSTGRES_KEYWORDS,
+            Dialect::Redshift => &REDSHIFT_KEYWORDS,
+            Dialect::Snowflake => &SNOWFLAKE_KEYWORDS,
+            Dialect::Soql => &SOQL_KEYWORDS,
+            Dialect::Sparksql => &SPARKSQL_KEYWORDS,
+            Dialect::Sqlite => &SQLITE_KEYWORDS,
+            Dialect::Starrocks => &STARROCKS_KEYWORDS,
+            Dialect::Teradata => &TERADATA_KEYWORDS,
+            Dialect::Trino => &TRINO_KEYWORDS,
+            Dialect::Tsql => &TSQL_KEYWORDS,
+            Dialect::Vertica => &VERTICA_KEYWORDS,
+        }
+    }
+
+    pub fn get_lexers(&self) -> &'static Vec<LexMatcher> {
+        match self {
+            Dialect::Ansi => &ANSI_LEXERS,
+            Dialect::Athena => &ATHENA_LEXERS,
+            Dialect::Bigquery => &BIGQUERY_LEXERS,
+            Dialect::Clickhouse => &CLICKHOUSE_LEXERS,
+            Dialect::Databricks => &DATABRICKS_LEXERS,
+            Dialect::Db2 => &DB2_LEXERS,
+            Dialect::Doris => &DORIS_LEXERS,
+            Dialect::Duckdb => &DUCKDB_LEXERS,
+            Dialect::Exasol => &EXASOL_LEXERS,
+            Dialect::Flink => &FLINK_LEXERS,
+            Dialect::Greenplum => &GREENPLUM_LEXERS,
+            Dialect::Hive => &HIVE_LEXERS,
+            Dialect::Impala => &IMPALA_LEXERS,
+            Dialect::Mariadb => &MARIADB_LEXERS,
+            Dialect::Materialize => &MATERIALIZE_LEXERS,
+            Dialect::Mysql => &MYSQL_LEXERS,
+            Dialect::Oracle => &ORACLE_LEXERS,
+            Dialect::Postgres => &POSTGRES_LEXERS,
+            Dialect::Redshift => &REDSHIFT_LEXERS,
+            Dialect::Snowflake => &SNOWFLAKE_LEXERS,
+            Dialect::Soql => &SOQL_LEXERS,
+            Dialect::Sparksql => &SPARKSQL_LEXERS,
+            Dialect::Sqlite => &SQLITE_LEXERS,
+            Dialect::Starrocks => &STARROCKS_LEXERS,
+            Dialect::Teradata => &TERADATA_LEXERS,
+            Dialect::Trino => &TRINO_LEXERS,
+            Dialect::Tsql => &TSQL_LEXERS,
+            Dialect::Vertica => &VERTICA_LEXERS,
+        }
+    }
+}
+
+impl FromStr for Dialect {
+    type Err = ();
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "ansi" => Ok(Dialect::Ansi),
+            "athena" => Ok(Dialect::Athena),
+            "bigquery" => Ok(Dialect::Bigquery),
+            "clickhouse" => Ok(Dialect::Clickhouse),
+            "databricks" => Ok(Dialect::Databricks),
+            "db2" => Ok(Dialect::Db2),
+            "doris" => Ok(Dialect::Doris),
+            "duckdb" => Ok(Dialect::Duckdb),
+            "exasol" => Ok(Dialect::Exasol),
+            "flink" => Ok(Dialect::Flink),
+            "greenplum" => Ok(Dialect::Greenplum),
+            "hive" => Ok(Dialect::Hive),
+            "impala" => Ok(Dialect::Impala),
+            "mariadb" => Ok(Dialect::Mariadb),
+            "materialize" => Ok(Dialect::Materialize),
+            "mysql" => Ok(Dialect::Mysql),
+            "oracle" => Ok(Dialect::Oracle),
+            "postgres" => Ok(Dialect::Postgres),
+            "redshift" => Ok(Dialect::Redshift),
+            "snowflake" => Ok(Dialect::Snowflake),
+            "soql" => Ok(Dialect::Soql),
+            "sparksql" => Ok(Dialect::Sparksql),
+            "sqlite" => Ok(Dialect::Sqlite),
+            "starrocks" => Ok(Dialect::Starrocks),
+            "teradata" => Ok(Dialect::Teradata),
+            "trino" => Ok(Dialect::Trino),
+            "tsql" => Ok(Dialect::Tsql),
+            "vertica" => Ok(Dialect::Vertica),
+            _ => Err(()),
+        }
+    }
+}
--- a/sqlfluffrs/src/dialect/mysql/matcher.rs
+++ b/sqlfluffrs/src/dialect/mysql/matcher.rs
--- a/sqlfluffrs/src/dialect/mysql/mod.rs
+++ b/sqlfluffrs/src/dialect/mysql/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/oracle/matcher.rs
+++ b/sqlfluffrs/src/dialect/oracle/matcher.rs
--- a/sqlfluffrs/src/dialect/oracle/mod.rs
+++ b/sqlfluffrs/src/dialect/oracle/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/postgres/matcher.rs
+++ b/sqlfluffrs/src/dialect/postgres/matcher.rs
--- a/sqlfluffrs/src/dialect/postgres/mod.rs
+++ b/sqlfluffrs/src/dialect/postgres/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/redshift/matcher.rs
+++ b/sqlfluffrs/src/dialect/redshift/matcher.rs
--- a/sqlfluffrs/src/dialect/redshift/mod.rs
+++ b/sqlfluffrs/src/dialect/redshift/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/snowflake/matcher.rs
+++ b/sqlfluffrs/src/dialect/snowflake/matcher.rs
--- a/sqlfluffrs/src/dialect/snowflake/mod.rs
+++ b/sqlfluffrs/src/dialect/snowflake/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/soql/matcher.rs
+++ b/sqlfluffrs/src/dialect/soql/matcher.rs
@@ -0,0 +1,978 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static SOQL_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "CASE".to_string(),
+    "CROSS".to_string(),
+    "FULL".to_string(),
+    "IGNORE".to_string(),
+    "INNER".to_string(),
+    "INTERVAL".to_string(),
+    "JOIN".to_string(),
+    "LAST_90_DAYS".to_string(),
+    "LAST_FISCAL_QUARTER".to_string(),
+    "LAST_FISCAL_YEAR".to_string(),
+    "LAST_MONTH".to_string(),
+    "LAST_N_DAYS".to_string(),
+    "LAST_N_FISCAL_QUARTERS".to_string(),
+    "LAST_N_FISCAL_YEARS".to_string(),
+    "LAST_N_MONTHS".to_string(),
+    "LAST_N_QUARTERS".to_string(),
+    "LAST_N_WEEKS".to_string(),
+    "LAST_N_YEARS".to_string(),
+    "LAST_QUARTER".to_string(),
+    "LAST_WEEK".to_string(),
+    "LAST_YEAR".to_string(),
+    "LEFT".to_string(),
+    "NATURAL".to_string(),
+    "NEXT_90_DAYS".to_string(),
+    "NEXT_FISCAL_QUARTER".to_string(),
+    "NEXT_FISCAL_YEAR".to_string(),
+    "NEXT_MONTH".to_string(),
+    "NEXT_N_DAYS".to_string(),
+    "NEXT_N_FISCAL_QUARTERS".to_string(),
+    "NEXT_N_FISCAL_YEARS".to_string(),
+    "NEXT_N_MONTHS".to_string(),
+    "NEXT_N_QUARTERS".to_string(),
+    "NEXT_N_WEEKS".to_string(),
+    "NEXT_N_YEARS".to_string(),
+    "NEXT_QUARTER".to_string(),
+    "NEXT_WEEK".to_string(),
+    "NEXT_YEAR".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PARTITION".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROWS".to_string(),
+    "SELECT".to_string(),
+    "SET".to_string(),
+    "THIS_FISCAL_QUARTER".to_string(),
+    "THIS_FISCAL_YEAR".to_string(),
+    "THIS_MONTH".to_string(),
+    "THIS_QUARTER".to_string(),
+    "THIS_WEEK".to_string(),
+    "THIS_YEAR".to_string(),
+    "TODAY".to_string(),
+    "TOMORROW".to_string(),
+    "UNION".to_string(),
+    "USING".to_string(),
+    "YESTERDAY".to_string(),
+]});
+
+pub static SOQL_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "inline_comment",
+        r#"(--|#)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("--"), String::from("#")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Soql,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Soql,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "single_quote",
+        r#"'([^'\\]|\\.|'')*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "double_quote",
+        r#""(""|[^"\\]|\\.)*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "back_quote",
+        r#"`(?:[^`\\]|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "datetime_literal",
+        r#"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|(\+|\-)[0-9]{2}:[0-9]{2})"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "date_literal",
+        r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "numeric_literal",
+        r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::literal_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Soql,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Soql,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/soql/mod.rs
+++ b/sqlfluffrs/src/dialect/soql/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/sparksql/matcher.rs
+++ b/sqlfluffrs/src/dialect/sparksql/matcher.rs
--- a/sqlfluffrs/src/dialect/sparksql/mod.rs
+++ b/sqlfluffrs/src/dialect/sparksql/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/sqlite/matcher.rs
+++ b/sqlfluffrs/src/dialect/sqlite/matcher.rs
--- a/sqlfluffrs/src/dialect/sqlite/mod.rs
+++ b/sqlfluffrs/src/dialect/sqlite/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/starrocks/matcher.rs
+++ b/sqlfluffrs/src/dialect/starrocks/matcher.rs
--- a/sqlfluffrs/src/dialect/starrocks/mod.rs
+++ b/sqlfluffrs/src/dialect/starrocks/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/teradata/matcher.rs
+++ b/sqlfluffrs/src/dialect/teradata/matcher.rs
@@ -0,0 +1,898 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static TERADATA_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "CASE".to_string(),
+    "CROSS".to_string(),
+    "FULL".to_string(),
+    "IGNORE".to_string(),
+    "INNER".to_string(),
+    "INTERVAL".to_string(),
+    "JOIN".to_string(),
+    "LEFT".to_string(),
+    "LOCKING".to_string(),
+    "NATURAL".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PARTITION".to_string(),
+    "REPLACE".to_string(),
+    "RESPECT".to_string(),
+    "RIGHT".to_string(),
+    "ROWS".to_string(),
+    "SELECT".to_string(),
+    "SET".to_string(),
+    "TIMESTAMP".to_string(),
+    "UNION".to_string(),
+    "USING".to_string(),
+]});
+
+pub static TERADATA_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "inline_comment",
+        r#"(--|#)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("--"), String::from("#")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Teradata,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Teradata,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "single_quote",
+        r#"'([^'\\]|\\.|'')*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "double_quote",
+        r#""(""|[^"\\]|\\.)*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "back_quote",
+        r#"`(?:[^`\\]|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "numeric_literal",
+        r#"([0-9]+(\.[0-9]*)?)"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Teradata,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Teradata,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/teradata/mod.rs
+++ b/sqlfluffrs/src/dialect/teradata/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/trino/matcher.rs
+++ b/sqlfluffrs/src/dialect/trino/matcher.rs
@@ -0,0 +1,998 @@
+/* This is a generated file! */
+use once_cell::sync::Lazy;
+use crate::matcher::{LexMatcher, extract_nested_block_comment};
+use crate::token::Token;
+use crate::token::config::TokenConfig;
+use crate::regex::RegexModeGroup;
+use crate::dialect::Dialect;
+use hashbrown::HashSet;
+
+pub static TRINO_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
+    "ALTER".to_string(),
+    "AND".to_string(),
+    "AS".to_string(),
+    "BETWEEN".to_string(),
+    "BY".to_string(),
+    "CASE".to_string(),
+    "CAST".to_string(),
+    "CONSTRAINT".to_string(),
+    "CREATE".to_string(),
+    "CROSS".to_string(),
+    "CUBE".to_string(),
+    "CURRENT_CATALOG".to_string(),
+    "CURRENT_DATE".to_string(),
+    "CURRENT_PATH".to_string(),
+    "CURRENT_ROLE".to_string(),
+    "CURRENT_SCHEMA".to_string(),
+    "CURRENT_TIME".to_string(),
+    "CURRENT_TIMESTAMP".to_string(),
+    "CURRENT_USER".to_string(),
+    "DEALLOCATE".to_string(),
+    "DELETE".to_string(),
+    "DESCRIBE".to_string(),
+    "DISTINCT".to_string(),
+    "DROP".to_string(),
+    "ELSE".to_string(),
+    "END".to_string(),
+    "ESCAPE".to_string(),
+    "EXCEPT".to_string(),
+    "EXECUTE".to_string(),
+    "EXISTS".to_string(),
+    "EXTRACT".to_string(),
+    "FALSE".to_string(),
+    "FOR".to_string(),
+    "FROM".to_string(),
+    "FULL".to_string(),
+    "FUNCTION".to_string(),
+    "GROUP".to_string(),
+    "GROUPING".to_string(),
+    "HAVING".to_string(),
+    "IN".to_string(),
+    "INNER".to_string(),
+    "INSERT".to_string(),
+    "INTERSECT".to_string(),
+    "INTO".to_string(),
+    "IS".to_string(),
+    "JOIN".to_string(),
+    "JSON_ARRAY".to_string(),
+    "JSON_EXISTS".to_string(),
+    "JSON_OBJECT".to_string(),
+    "JSON_QUERY".to_string(),
+    "JSON_TABLE".to_string(),
+    "JSON_VALUE".to_string(),
+    "LEFT".to_string(),
+    "LIKE".to_string(),
+    "LISTAGG".to_string(),
+    "LOCALTIME".to_string(),
+    "LOCALTIMESTAMP".to_string(),
+    "NATURAL".to_string(),
+    "NORMALIZE".to_string(),
+    "NOT".to_string(),
+    "NULL".to_string(),
+    "ON".to_string(),
+    "OR".to_string(),
+    "ORDER".to_string(),
+    "OUTER".to_string(),
+    "PREPARE".to_string(),
+    "RECURSIVE".to_string(),
+    "RIGHT".to_string(),
+    "ROLLUP".to_string(),
+    "SELECT".to_string(),
+    "SKIP".to_string(),
+    "TABLE".to_string(),
+    "THEN".to_string(),
+    "TRIM".to_string(),
+    "TRUE".to_string(),
+    "UESCAPE".to_string(),
+    "UNION".to_string(),
+    "UNNEST".to_string(),
+    "USING".to_string(),
+    "VALUES".to_string(),
+    "WHEN".to_string(),
+    "WHERE".to_string(),
+    "WITH".to_string(),
+]});
+
+pub static TRINO_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "inline_comment",
+        r#"(--|#)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        Some(vec![String::from("--"), String::from("#")]),
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['#','-','/']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "block_comment",
+        r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Trino,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        Some(Box::new(
+    LexMatcher::regex_subdivider(
+        Dialect::Trino,
+        "whitespace",
+        r#"[^\S\r\n]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::whitespace_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ))),
+        None,
+        None,
+        None,
+        None,
+        None,
+        Some(extract_nested_block_comment),
+        |input| input.starts_with("/"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "single_quote",
+        r#"'([^'\\]|\\.|'')*'"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'\'', ..] => true,                     // Single quote case
+        [b'R' | b'r', b'\'', ..] => true,        // r' or R'
+        [b'B' | b'b', b'\'', ..] => true,        // b' or B'
+        [b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
+        [b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "double_quote",
+        r#""([^"]|"")*""#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#""((?:[^"]|"")*)""#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#""""#.to_string(), r#"""#.to_string())),
+        None,
+        None,
+        |input| match input.as_bytes() {
+        [b'"', ..] => true,                     // Just a double quote
+        [b'R' | b'r', b'"', ..] => true,        // r" or R"
+        [b'B' | b'b', b'"', ..] => true,        // b" or B"
+        [b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
+        [b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
+        _ => false,
+    },
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "back_quote",
+        r#"`(?:[^`\\]|\\.)*`"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
+        Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "dollar_quote",
+        r#"\$(\w*)\$(.*?)\$\1\$"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
+        None,
+        None,
+        None,
+        |input| input.starts_with("$"),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "numeric_literal",
+        r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::literal_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "obevo_annotation",
+        r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comment_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "glob_operator",
+        "~~~",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "right_arrow",
+        "->",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "fat_right_arrow",
+        "=>",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "like_operator",
+        r#"!?~~?\*?"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::comparison_operator_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "newline",
+        r#"\r\n|\n"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::newline_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "casting_operator",
+        "::",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "equals",
+        "=",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "greater_than",
+        ">",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "less_than",
+        "<",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "not",
+        "!",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "dot",
+        ".",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "comma",
+        ",",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "plus",
+        "+",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "minus",
+        "-",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "divide",
+        "/",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "percent",
+        "%",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "question",
+        "?",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "ampersand",
+        "&",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "vertical_bar",
+        "|",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "caret",
+        "^",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "star",
+        "*",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "start_bracket",
+        "(",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "end_bracket",
+        ")",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "start_square_bracket",
+        "[",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "end_square_bracket",
+        "]",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "start_curly_bracket",
+        "{",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "end_curly_bracket",
+        "}",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "colon",
+        ":",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::string_lexer(
+        Dialect::Trino,
+        "semicolon",
+        ";",
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::code_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+    ),
+
+    LexMatcher::regex_lexer(
+        Dialect::Trino,
+        "word",
+        r#"[0-9a-zA-Z_]+"#,
+        |raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
+         quoted_value, escape_replacement, casefold| {
+            Token::word_token(raw, pos_marker, TokenConfig {
+                class_types, instance_types, trim_start, trim_chars,
+                quoted_value, escape_replacement, casefold,
+            })
+        },
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        |_| true,
+        None,
+    ),
+]});
--- a/sqlfluffrs/src/dialect/trino/mod.rs
+++ b/sqlfluffrs/src/dialect/trino/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/tsql/matcher.rs
+++ b/sqlfluffrs/src/dialect/tsql/matcher.rs
--- a/sqlfluffrs/src/dialect/tsql/mod.rs
+++ b/sqlfluffrs/src/dialect/tsql/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/dialect/vertica/matcher.rs
+++ b/sqlfluffrs/src/dialect/vertica/matcher.rs
--- a/sqlfluffrs/src/dialect/vertica/mod.rs
+++ b/sqlfluffrs/src/dialect/vertica/mod.rs
@@ -0,0 +1,5 @@
+/* This is a generated file! */
+
+#[allow(clippy::needless_raw_string_hashes)]
+pub mod matcher;
+// pub mod parser;
--- a/sqlfluffrs/src/lexer.rs
+++ b/sqlfluffrs/src/lexer.rs
--- a/sqlfluffrs/src/lib.rs
+++ b/sqlfluffrs/src/lib.rs
@@ -0,0 +1,14 @@
+pub mod config;
+pub mod dialect;
+pub mod lexer;
+pub mod marker;
+pub mod matcher;
+#[cfg(feature = "python")]
+pub mod python;
+pub mod regex;
+pub mod slice;
+pub mod templater;
+pub mod token;
+// include!(concat!(env!("OUT_DIR"), "/dialect_matcher.rs"));
+
+use crate::dialect::Dialect;
--- a/sqlfluffrs/src/marker.rs
+++ b/sqlfluffrs/src/marker.rs
@@ -0,0 +1,491 @@
+use hashbrown::HashMap;
+use std::cmp::Ordering;
+use std::fmt::Display;
+use std::sync::Arc;
+
+use crate::slice::Slice;
+use crate::templater::templatefile::TemplatedFile;
+
+#[derive(Debug, Clone)]
+pub struct PositionMarker {
+    pub source_slice: Slice,
+    pub templated_slice: Slice,
+    pub templated_file: Arc<TemplatedFile>,
+    pub working_line_no: usize,
+    pub working_line_pos: usize,
+}
+
+impl PositionMarker {
+    #[must_use]
+    pub fn new(
+        source_slice: Slice,
+        templated_slice: Slice,
+        templated_file: &Arc<TemplatedFile>,
+        working_line_no: Option<usize>,
+        working_line_pos: Option<usize>,
+    ) -> Self {
+        let (working_line_no, working_line_pos) = match (working_line_no, working_line_pos) {
+            (Some(working_line_no), Some(working_line_pos)) => (working_line_no, working_line_pos),
+            _ => templated_file.get_line_pos_of_char_pos(source_slice.start, false),
+        };
+
+        Self {
+            source_slice,
+            templated_slice,
+            templated_file: Arc::clone(templated_file),
+            working_line_no,
+            working_line_pos,
+        }
+    }
+
+    #[must_use]
+    pub fn working_loc(&self) -> (usize, usize) {
+        (self.working_line_no, self.working_line_pos)
+    }
+
+    #[must_use]
+    pub fn working_loc_after(&self, raw: &str) -> (usize, usize) {
+        // Infer next position based on the raw string
+        self.infer_next_position(raw, self.working_line_no, self.working_line_pos)
+    }
+
+    #[must_use]
+    pub fn infer_next_position(
+        &self,
+        raw: &str,
+        line_no: usize,
+        line_pos: usize,
+    ) -> (usize, usize) {
+        // Placeholder for position inference logic
+        // Example implementation: move forward by the length of the string
+        let lines: Vec<&str> = raw.split('\n').collect();
+        if lines.len() > 1 {
+            let num_lines: usize = lines.len();
+            let last_line_len: usize = lines.last().unwrap().len();
+            (line_no + num_lines - 1, last_line_len + 1)
+        } else {
+            let first_line_len: usize = raw.len();
+            (line_no, line_pos + first_line_len)
+        }
+    }
+
+    #[must_use]
+    pub fn source_position(&self) -> (usize, usize) {
+        self.templated_file
+            .get_line_pos_of_char_pos(self.source_slice.start, true)
+    }
+
+    #[must_use]
+    pub fn templated_position(&self) -> (usize, usize) {
+        self.templated_file
+            .get_line_pos_of_char_pos(self.source_slice.start, false)
+    }
+
+    #[must_use]
+    pub fn line_no(&self) -> usize {
+        self.source_position().0
+    }
+
+    #[must_use]
+    pub fn line_pos(&self) -> usize {
+        self.source_position().1
+    }
+
+    #[must_use]
+    pub fn to_source_string(&self) -> String {
+        let (line, pos) = self.source_position();
+        format!("[L:{line:3}, P:{pos:3}]")
+    }
+
+    #[must_use]
+    pub fn start_point_marker(&self) -> Self {
+        PositionMarker::from_point(
+            self.source_slice.start,
+            self.templated_slice.start,
+            &self.templated_file,
+            Some(self.working_line_no),
+            Some(self.working_line_pos),
+        )
+    }
+
+    #[must_use]
+    pub fn end_point_marker(&self) -> Self {
+        PositionMarker::from_point(
+            self.source_slice.stop,
+            self.templated_slice.stop,
+            &self.templated_file,
+            None,
+            None,
+        )
+    }
+
+    #[must_use]
+    pub fn is_point(&self) -> bool {
+        slice_is_point(&self.source_slice) && slice_is_point(&self.templated_slice)
+    }
+
+    #[must_use]
+    pub fn with_working_position(&self, line_no: usize, line_pos: usize) -> Self {
+        PositionMarker {
+            working_line_no: line_no,
+            working_line_pos: line_pos,
+            ..self.clone()
+        }
+    }
+
+    #[must_use]
+    pub fn is_literal(&self) -> bool {
+        self.templated_file
+            .is_source_slice_literal(&self.source_slice)
+    }
+
+    #[must_use]
+    pub fn source_str(&self) -> String {
+        self.templated_file
+            .source_str
+            .chars()
+            .skip(self.source_slice.start)
+            .take(self.source_slice.len())
+            .collect::<String>()
+    }
+
+    #[must_use]
+    pub fn to_source_dict(&self) -> HashMap<String, usize> {
+        self.templated_file
+            .source_position_dict_from_slice(&self.source_slice)
+    }
+
+    #[must_use]
+    pub fn from_point(
+        source_point: usize,
+        templated_point: usize,
+        templated_file: &Arc<TemplatedFile>,
+        working_line_no: Option<usize>,
+        working_line_pos: Option<usize>,
+    ) -> Self {
+        let source_slice = Slice::from(source_point..source_point);
+        let templated_slice = Slice::from(templated_point..templated_point);
+
+        PositionMarker::new(
+            source_slice,
+            templated_slice,
+            templated_file,
+            working_line_no,
+            working_line_pos,
+        )
+    }
+
+    #[must_use]
+    pub fn from_points(start_marker: &PositionMarker, end_marker: &PositionMarker) -> Self {
+        if start_marker.templated_file != end_marker.templated_file {
+            panic!("Markers must refer to the same templated file.");
+        }
+
+        PositionMarker::new(
+            start_marker.source_slice,
+            start_marker.templated_slice,
+            &start_marker.templated_file,
+            Some(start_marker.working_line_no),
+            Some(start_marker.working_line_pos),
+        )
+    }
+
+    #[must_use]
+    pub fn from_child_markers(markers: &[Option<PositionMarker>]) -> Self {
+        let mut source_start = usize::MAX;
+        let mut source_stop = usize::MIN;
+        let mut templated_start = usize::MAX;
+        let mut templated_stop = usize::MIN;
+
+        let mut templated_file = None;
+
+        for marker in markers.iter().filter_map(|m| m.as_ref()) {
+            source_start = source_start.min(marker.source_slice.start);
+            source_stop = source_stop.max(marker.source_slice.stop);
+            templated_start = templated_start.min(marker.templated_slice.start);
+            templated_stop = templated_stop.max(marker.templated_slice.stop);
+
+            if templated_file.is_none() {
+                templated_file = Some(marker.templated_file.clone());
+            }
+            if templated_file.as_ref() != Some(&marker.templated_file) {
+                panic!("Markers must refer to the same templated file.");
+            }
+        }
+
+        let source_slice = Slice::from(source_start..source_stop);
+        let templated_slice = Slice::from(templated_start..templated_stop);
+
+        PositionMarker::new(
+            source_slice,
+            templated_slice,
+            &templated_file.unwrap(),
+            None,
+            None,
+        )
+    }
+}
+
+impl Eq for PositionMarker {}
+
+impl PartialEq for PositionMarker {
+    fn eq(&self, other: &Self) -> bool {
+        self.working_loc() == other.working_loc()
+    }
+}
+
+impl PartialOrd for PositionMarker {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for PositionMarker {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.working_loc().cmp(&other.working_loc())
+    }
+}
+
+impl Display for PositionMarker {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.to_source_string())
+    }
+}
+
+#[must_use]
+pub fn slice_is_point(test_slice: &Slice) -> bool {
+    test_slice.start == test_slice.stop
+}
+
+#[cfg(feature = "python")]
+pub mod python {
+    use std::{fmt::Display, sync::Arc};
+
+    use hashbrown::HashMap;
+    use pyo3::{prelude::*, types::PyType};
+
+    use crate::{
+        slice::Slice,
+        templater::templatefile::{
+            python::{PySqlFluffTemplatedFile, PyTemplatedFile},
+            TemplatedFile,
+        },
+    };
+
+    use super::PositionMarker;
+
+    #[pyclass(name = "RsPositionMarker", str, eq, ord, frozen, module = "sqlfluffrs")]
+    #[repr(transparent)]
+    #[derive(Debug, Clone)]
+    pub struct PyPositionMarker(pub PositionMarker);
+
+    #[pymethods]
+    impl PyPositionMarker {
+        #[getter]
+        pub fn source_slice(&self) -> Slice {
+            self.0.source_slice
+        }
+
+        #[getter]
+        pub fn templated_slice(&self) -> Slice {
+            self.0.templated_slice
+        }
+
+        // #[getter]
+        // pub fn templated_file(&self) -> PySqlFluffTemplatedFile {
+        //     dbg!("templated file from PositionMarker");
+        //     PySqlFluffTemplatedFile(PyTemplatedFile::from(self.0.templated_file.clone()))
+        // }
+
+        #[getter]
+        pub fn templated_file(&self) -> PyTemplatedFile {
+            PyTemplatedFile(self.0.templated_file.clone())
+        }
+
+        #[getter]
+        pub fn working_line_no(&self) -> usize {
+            self.0.working_line_no
+        }
+
+        #[getter]
+        pub fn working_line_pos(&self) -> usize {
+            self.0.working_line_pos
+        }
+
+        #[getter]
+        pub fn working_loc(&self) -> (usize, usize) {
+            (self.0.working_line_no, self.0.working_line_pos)
+        }
+
+        pub fn start_point_marker(&self) -> Self {
+            Self(self.0.start_point_marker())
+        }
+
+        pub fn end_point_marker(&self) -> Self {
+            Self(self.0.end_point_marker())
+        }
+
+        pub fn source_position(&self) -> (usize, usize) {
+            self.0.source_position()
+        }
+
+        pub fn templated_position(&self) -> (usize, usize) {
+            self.0.templated_position()
+        }
+
+        pub fn is_literal(&self) -> bool {
+            self.0.is_literal()
+        }
+
+        pub fn with_working_position(&self, line_no: usize, line_pos: usize) -> Self {
+            Self(self.0.with_working_position(line_no, line_pos))
+        }
+
+        pub fn infer_next_position(
+            &self,
+            raw: &str,
+            line_no: usize,
+            line_pos: usize,
+        ) -> (usize, usize) {
+            self.0.infer_next_position(raw, line_no, line_pos)
+        }
+
+        pub fn line_no(&self) -> usize {
+            self.0.line_no()
+        }
+
+        pub fn line_pos(&self) -> usize {
+            self.0.line_pos()
+        }
+
+        pub fn source_str(&self) -> String {
+            self.0.source_str()
+        }
+
+        pub fn to_source_dict(&self) -> HashMap<String, usize> {
+            self.0.to_source_dict()
+        }
+
+        #[classmethod]
+        #[pyo3(signature = (markers))]
+        pub fn from_child_markers(
+            _cls: &Bound<'_, PyType>,
+            markers: Vec<Option<PyPositionMarker>>,
+        ) -> PyResult<Self> {
+            let rust_markers: Vec<Option<PositionMarker>> =
+                markers.into_iter().map(|m| m.map(Into::into)).collect();
+            Ok(Self(PositionMarker::from_child_markers(&rust_markers)))
+        }
+
+        #[classmethod]
+        pub fn from_point(
+            _cls: &Bound<'_, PyType>,
+            source_point: usize,
+            templated_point: usize,
+            templated_file: PySqlFluffTemplatedFile,
+            working_line_no: Option<usize>,
+            working_line_pos: Option<usize>,
+        ) -> Self {
+            let templated_file = templated_file.0 .0;
+            Self(PositionMarker::from_point(
+                source_point,
+                templated_point,
+                &templated_file,
+                working_line_no,
+                working_line_pos,
+            ))
+        }
+
+        #[classmethod]
+        pub fn from_points(
+            _cls: &Bound<'_, PyType>,
+            start_marker: &PyPositionMarker,
+            end_marker: &PyPositionMarker,
+        ) -> Self {
+            Self(PositionMarker::from_points(&start_marker.0, &end_marker.0))
+        }
+
+        pub fn is_point(&self) -> bool {
+            self.0.is_point()
+        }
+
+        pub fn to_source_string(&self) -> String {
+            self.0.to_source_string()
+        }
+    }
+
+    impl Display for PyPositionMarker {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            write!(f, "{}", self.0.to_source_string())
+        }
+    }
+
+    impl From<PyPositionMarker> for PySqlFluffTemplatedFile {
+        fn from(value: PyPositionMarker) -> Self {
+            PySqlFluffTemplatedFile(PyTemplatedFile::from(value.0.templated_file.clone()))
+        }
+    }
+
+    impl From<PyPositionMarker> for PositionMarker {
+        fn from(value: PyPositionMarker) -> Self {
+            value.0
+        }
+    }
+
+    impl From<PositionMarker> for PyPositionMarker {
+        fn from(value: PositionMarker) -> Self {
+            Self(value)
+        }
+    }
+
+    impl PartialEq for PyPositionMarker {
+        fn eq(&self, other: &Self) -> bool {
+            self.0.eq(&other.0)
+        }
+    }
+
+    impl PartialOrd for PyPositionMarker {
+        fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+            self.0.partial_cmp(&other.0)
+        }
+    }
+
+    #[derive(Clone, IntoPyObject, Debug)]
+    pub struct PySqlFluffPositionMarker(pub PyPositionMarker);
+
+    impl<'py> FromPyObject<'py> for PySqlFluffPositionMarker {
+        fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
+            let source_slice = obj.getattr("source_slice")?.extract::<Slice>()?;
+            // dbg!(source_slice);
+            let templated_slice = obj.getattr("templated_slice")?.extract::<Slice>()?;
+            // dbg!(templated_slice);
+            let templated_file: Arc<TemplatedFile> = obj
+                .getattr("templated_file")?
+                .extract::<PySqlFluffTemplatedFile>()?
+                .into();
+            // dbg!(templated_file.clone());
+            // let working_line_no = obj.getattr("working_line_no")?.extract::<usize>()?;
+            // let working_line_pos = obj.getattr("working_line_pos")?.extract::<usize>()?;
+
+            Ok(Self(PyPositionMarker(PositionMarker::new(
+                source_slice,
+                templated_slice,
+                &templated_file,
+                None,
+                None,
+            ))))
+        }
+    }
+
+    impl From<PySqlFluffPositionMarker> for PyPositionMarker {
+        fn from(value: PySqlFluffPositionMarker) -> Self {
+            value.0
+        }
+    }
+
+    impl From<PySqlFluffPositionMarker> for PositionMarker {
+        fn from(value: PySqlFluffPositionMarker) -> Self {
+            value.0 .0
+        }
+    }
+}
--- a/sqlfluffrs/src/matcher.rs
+++ b/sqlfluffrs/src/matcher.rs
@@ -0,0 +1,482 @@
+use std::fmt::Display;
+
+use fancy_regex::{Regex as FancyRegex, RegexBuilder as FancyRegexBuilder};
+use hashbrown::HashSet;
+use regex::{Regex, RegexBuilder};
+
+use crate::{
+    dialect::Dialect,
+    marker::PositionMarker,
+    regex::RegexModeGroup,
+    token::Token,
+};
+
+/// Legacy function pointer type for token generation (maintains backward compatibility)
+/// This signature accepts individual parameters and constructs a TokenConfig internally
+pub type TokenGenerator = fn(
+    String,                                    // raw
+    PositionMarker,                            // pos_marker
+    HashSet<String>,                           // class_types
+    Vec<String>,                               // instance_types
+    Option<Vec<String>>,                       // trim_start
+    Option<Vec<String>>,                       // trim_chars
+    Option<(String, RegexModeGroup)>,          // quoted_value
+    Option<(String, String)>,                  // escape_replacement
+    Option<fn(&str) -> str>,                   // casefold
+) -> Token;
+
+#[derive(Debug, Clone)]
+pub enum LexerMode {
+    String(String),                           // Match a literal string
+    Regex(Regex, fn(&str) -> bool),           // Match using a regex
+    FancyRegex(FancyRegex, fn(&str) -> bool), // Match using a regex
+    Function(fn(&str, Dialect) -> Option<&str>),
+}
+
+impl Display for LexerMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match *self {
+            LexerMode::Regex(_, _) => write!(f, "RegexMatcher"),
+            LexerMode::FancyRegex(_, _) => write!(f, "FancyRegexMatcher"),
+            LexerMode::String(_) => write!(f, "StringMatcher"),
+            LexerMode::Function(_) => write!(f, "FunctionMatcher"),
+        }
+    }
+}
+
+pub struct LexedElement<'a> {
+    pub raw: &'a str,
+    pub matcher: &'a LexMatcher,
+}
+
+impl<'a> LexedElement<'a> {
+    pub fn new(raw: &'a str, matcher: &'a LexMatcher) -> Self {
+        Self { raw, matcher }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct LexMatcher {
+    pub dialect: Dialect,
+    pub name: String,
+    pub mode: LexerMode,
+    pub token_class_func: TokenGenerator,
+    pub subdivider: Option<Box<LexMatcher>>,
+    pub trim_post_subdivide: Option<Box<LexMatcher>>,
+    pub trim_start: Option<Vec<String>>,
+    pub trim_chars: Option<Vec<String>>,
+    pub quoted_value: Option<(String, RegexModeGroup)>,
+    pub escape_replacements: Option<(String, String)>,
+    pub casefold: Option<fn(&str) -> str>,
+    pub kwarg_type: Option<String>,
+}
+
+impl Display for LexMatcher {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "<{}: {}>", self.mode, self.name)
+    }
+}
+
+impl LexMatcher {
+    pub fn string_lexer(
+        dialect: Dialect,
+        name: &str,
+        template: &str,
+        token_class_func: TokenGenerator,
+        subdivider: Option<Box<LexMatcher>>,
+        trim_post_subdivide: Option<Box<LexMatcher>>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacements: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+        kwarg_type: Option<String>,
+    ) -> Self {
+        Self {
+            dialect,
+            name: name.to_string(),
+            mode: LexerMode::String(template.to_string()),
+            token_class_func,
+            subdivider,
+            trim_post_subdivide,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacements,
+            casefold,
+            kwarg_type,
+        }
+    }
+
+    fn base_regex_lexer(
+        dialect: Dialect,
+        name: &str,
+        pattern: &str,
+        token_class_func: TokenGenerator,
+        subdivider: Option<Box<LexMatcher>>,
+        trim_post_subdivide: Option<Box<LexMatcher>>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacements: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+        fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
+        precheck: fn(&str) -> bool,
+        kwarg_type: Option<String>,
+    ) -> Self {
+        let mode = match RegexBuilder::new(&pattern).build() {
+            Ok(regex) => LexerMode::Regex(regex, precheck),
+            Err(_) => match FancyRegexBuilder::new(&pattern).build() {
+                Ok(regex) => LexerMode::FancyRegex(regex, precheck),
+                Err(_) => {
+                    if let Some(fallback) = fallback_lexer {
+                        LexerMode::Function(fallback)
+                    } else {
+                        panic!(
+                            "Unable to compile regex {} and no fallback function provided",
+                            pattern
+                        )
+                    }
+                }
+            },
+        };
+
+        Self {
+            dialect,
+            name: name.to_string(),
+            mode,
+            token_class_func,
+            subdivider,
+            trim_post_subdivide,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacements,
+            casefold,
+            kwarg_type,
+        }
+    }
+
+    pub fn regex_lexer(
+        dialect: Dialect,
+        name: &str,
+        template: &str,
+        token_class_func: TokenGenerator,
+        subdivider: Option<Box<LexMatcher>>,
+        trim_post_subdivide: Option<Box<LexMatcher>>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacements: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+        fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
+        precheck: fn(&str) -> bool,
+        kwarg_type: Option<String>,
+    ) -> Self {
+        let pattern = format!(r"(?s)\A(?:{})", template);
+        Self::base_regex_lexer(
+            dialect,
+            name,
+            &pattern,
+            token_class_func,
+            subdivider,
+            trim_post_subdivide,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacements,
+            casefold,
+            fallback_lexer,
+            precheck,
+            kwarg_type,
+        )
+    }
+
+    pub fn regex_subdivider(
+        dialect: Dialect,
+        name: &str,
+        template: &str,
+        token_class_func: TokenGenerator,
+        subdivider: Option<Box<LexMatcher>>,
+        trim_post_subdivide: Option<Box<LexMatcher>>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacements: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+        fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
+        precheck: fn(&str) -> bool,
+        kwarg_type: Option<String>,
+    ) -> Self {
+        let pattern = format!(r"(?:{})", template);
+        Self::base_regex_lexer(
+            dialect,
+            name,
+            &pattern,
+            token_class_func,
+            subdivider,
+            trim_post_subdivide,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacements,
+            casefold,
+            fallback_lexer,
+            precheck,
+            kwarg_type,
+        )
+    }
+
+    pub fn scan_match<'a>(&'a self, input: &'a str) -> Option<(Vec<LexedElement<'a>>, usize)> {
+        // let t = Instant::now();
+        if input.is_empty() {
+            panic!("Unexpected empty string!");
+        }
+
+        // Match based on the mode
+        let matched = match &self.mode {
+            LexerMode::String(template) => input
+                .starts_with(template)
+                .then(|| LexedElement::new(template, self)),
+            LexerMode::Regex(regex, is_match_valid) => {
+                if !(is_match_valid)(input) {
+                    // println!("{},{}", self.name, t.elapsed().as_nanos());
+                    return None;
+                }
+                regex
+                    .find(input)
+                    .map(|mat| LexedElement::new(mat.as_str(), self))
+            }
+            LexerMode::FancyRegex(regex, is_match_valid) => {
+                if !(is_match_valid)(input) {
+                    // println!("{},{}", self.name, t.elapsed().as_nanos());
+                    return None;
+                }
+                regex
+                    .find(input)
+                    .ok()
+                    .flatten()
+                    .map(|mat| LexedElement::new(mat.as_str(), self))
+            }
+            LexerMode::Function(function) => {
+                (function)(input, self.dialect).map(|s| LexedElement::new(s, self))
+            }
+        };
+        // println!("{},{}", self.name, t.elapsed().as_nanos());
+
+        // Handle subdivision and trimming
+        if let Some(matched) = matched {
+            let len = matched.raw.len();
+            let elements = self.subdivide(matched);
+            Some((elements, len))
+        } else {
+            None
+        }
+    }
+
+    fn search(&self, input: &str) -> Option<(usize, usize)> {
+        match &self.mode {
+            LexerMode::String(template) => input.find(template).map(|start| {
+                let end = start + template.len();
+                (start, end)
+            }),
+            LexerMode::Regex(regex, _) => regex.find(input).map(|mat| (mat.start(), mat.end())),
+            LexerMode::FancyRegex(regex, _) => regex
+                .find(input)
+                .ok()
+                .flatten()
+                .map(|mat| (mat.start(), mat.end())),
+            _ => todo!(),
+        }
+    }
+
+    fn subdivide<'a>(&'a self, matched: LexedElement<'a>) -> Vec<LexedElement<'a>> {
+        if let Some(subdivider) = &self.subdivider {
+            let mut elements = Vec::new();
+            let mut buffer = matched.raw;
+            while !buffer.is_empty() {
+                if let Some((start, end)) = subdivider.search(buffer) {
+                    let trimmed_elems = self.trim_match(&buffer[..start]);
+                    elements.extend(trimmed_elems);
+                    elements.push(LexedElement {
+                        raw: &buffer[start..end],
+                        matcher: subdivider,
+                    });
+                    buffer = &buffer[end..];
+                } else {
+                    let trimmed_elems = self.trim_match(buffer);
+                    elements.extend(trimmed_elems);
+                    break;
+                }
+            }
+            elements
+        } else {
+            vec![matched]
+        }
+    }
+
+    fn trim_match<'a>(&'a self, raw: &'a str) -> Vec<LexedElement<'a>> {
+        let mut elements = Vec::new();
+        let mut buffer = raw;
+        let mut content_buffer = 0..0;
+
+        if let Some(trim_post_subdivide) = &self.trim_post_subdivide {
+            while !buffer.is_empty() {
+                if let Some((start, end)) = trim_post_subdivide.search(buffer) {
+                    if start == 0 {
+                        // Starting match
+                        elements.push(LexedElement {
+                            raw: &buffer[..end],
+                            matcher: trim_post_subdivide,
+                        });
+                        buffer = &buffer[end..];
+                        content_buffer = end..end;
+                    } else if end == buffer.len() {
+                        elements.push(LexedElement {
+                            raw: &raw[content_buffer.start..content_buffer.end + start],
+                            matcher: self,
+                        });
+                        elements.push(LexedElement {
+                            raw: &buffer[start..end],
+                            matcher: trim_post_subdivide,
+                        });
+                        return elements;
+                    } else {
+                        content_buffer.end += end;
+                        buffer = &buffer[end..];
+                    }
+                } else {
+                    break;
+                }
+            }
+        }
+        if !content_buffer.is_empty() || !buffer.is_empty() {
+            elements.push(LexedElement::new(&raw[content_buffer.start..], self));
+        }
+        elements
+    }
+
+    pub fn construct_token(&self, raw: &str, pos_marker: PositionMarker) -> Token {
+        let instance_types = match self.kwarg_type.clone() {
+            Some(t) => vec![t],
+            None => vec![self.name.clone()],
+        };
+
+        (self.token_class_func)(
+            raw.to_string(),
+            pos_marker,
+            HashSet::new(),
+            instance_types,
+            self.trim_start.clone(),
+            self.trim_chars.clone(),
+            self.quoted_value.clone(),
+            self.escape_replacements.clone(),
+            self.casefold,
+        )
+    }
+}
+
+pub fn extract_nested_block_comment(input: &str, dialect: Dialect) -> Option<&str> {
+    let mut chars = input.chars().peekable();
+    let mut comment = String::new();
+
+    // Ensure the input starts with "/*"
+    if chars.next() != Some('/') || chars.next() != Some('*') {
+        return None;
+    }
+
+    comment.push_str("/*"); // Add the opening delimiter
+    let mut depth = 1; // Track nesting level
+
+    while let Some(c) = chars.next() {
+        comment.push(c);
+
+        if c == '/' && chars.peek() == Some(&'*') {
+            chars.next(); // Consume '*'
+            comment.push('*');
+            depth += 1;
+        } else if c == '*' && chars.peek() == Some(&'/') {
+            chars.next(); // Consume '/'
+            comment.push('/');
+            depth -= 1;
+
+            if depth == 0 {
+                return Some(&input[..comment.len()]);
+            }
+        }
+    }
+
+    // If we reach here, the comment wasn't properly closed
+    match &dialect {
+        Dialect::Sqlite => Some(&input[..comment.len()]),
+        _ => None,
+    }
+}
+
+// TODO: implement python passthroughs
+#[cfg(feature = "python")]
+pub mod python {}
+
+#[cfg(test)]
+mod test {
+    use crate::{dialect::Dialect, token::Token};
+
+    use super::{extract_nested_block_comment, LexMatcher};
+
+    #[test]
+    fn test_subdivide() {
+        let block_comment_matcher = LexMatcher::regex_lexer(
+            Dialect::Ansi,
+            "block_comment",
+            r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
+            Token::comment_token_compat,
+            Some(Box::new(LexMatcher::regex_subdivider(
+                Dialect::Ansi,
+                "newline",
+                r#"\r\n|\n"#,
+                Token::newline_token_compat,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                |_| true,
+                None,
+            ))),
+            Some(Box::new(LexMatcher::regex_subdivider(
+                Dialect::Ansi,
+                "whitespace",
+                r#"[^\S\r\n]+"#,
+                Token::whitespace_token_compat,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                None,
+                |_| true,
+                None,
+            ))),
+            None,
+            None,
+            None,
+            None,
+            None,
+            Some(extract_nested_block_comment),
+            |input| input.starts_with("/"),
+            None,
+        );
+
+        let (elems, _) = block_comment_matcher
+            .scan_match("/*\n)\n*/")
+            .expect("should match");
+        for elem in elems {
+            println!("{}: {}", elem.matcher.name, elem.raw);
+        }
+    }
+}
--- a/sqlfluffrs/src/python.rs
+++ b/sqlfluffrs/src/python.rs
@@ -0,0 +1,25 @@
+use crate::lexer::python::{PyLexer, PySQLLexError};
+use crate::marker::python::PyPositionMarker;
+use crate::templater::{
+    fileslice::python::{PyRawFileSlice, PyTemplatedFileSlice},
+    templatefile::python::PyTemplatedFile,
+};
+use crate::token::python::PyToken;
+use pyo3::prelude::*;
+
+/// A Python module implemented in Rust.
+#[pymodule(name = "sqlfluffrs", module = "sqlfluffrs")]
+fn sqlfluffrs(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    let env = env_logger::Env::default().filter_or("RUST_LOG", "warn");
+    env_logger::Builder::from_env(env)
+        .try_init()
+        .unwrap_or_else(|_| log::warn!("env_logger already initialized!"));
+    m.add_class::<PyToken>()?;
+    m.add_class::<PyTemplatedFile>()?;
+    m.add_class::<PyTemplatedFileSlice>()?;
+    m.add_class::<PyRawFileSlice>()?;
+    m.add_class::<PySQLLexError>()?;
+    m.add_class::<PyLexer>()?;
+    m.add_class::<PyPositionMarker>()?;
+    Ok(())
+}
--- a/sqlfluffrs/src/regex.rs
+++ b/sqlfluffrs/src/regex.rs
@@ -0,0 +1,85 @@
+use std::fmt::Display;
+
+use fancy_regex::{Regex as FancyRegex, RegexBuilder as FancyRegexBuilder};
+#[cfg(feature = "python")]
+use pyo3::pyclass;
+use regex::{Regex, RegexBuilder};
+
+#[cfg_attr(feature = "python", pyclass)]
+#[derive(Debug, Clone)]
+pub enum RegexModeGroup {
+    Index(usize),
+    Name(String),
+}
+
+impl From<usize> for RegexModeGroup {
+    fn from(idx: usize) -> Self {
+        RegexModeGroup::Index(idx)
+    }
+}
+
+impl From<&str> for RegexModeGroup {
+    fn from(name: &str) -> Self {
+        RegexModeGroup::Name(name.to_string())
+    }
+}
+
+impl From<String> for RegexModeGroup {
+    fn from(name: String) -> Self {
+        RegexModeGroup::Name(name)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub enum RegexMode {
+    Regex(Regex),           // Match using a regex
+    FancyRegex(FancyRegex), // Match using a regex
+}
+
+impl RegexMode {
+    pub fn new(pattern: &str) -> Self {
+        // Try to compile with the standard regex first
+        if let Ok(re) = RegexBuilder::new(pattern).build() {
+            RegexMode::Regex(re)
+        } else if let Ok(re) = FancyRegexBuilder::new(pattern).build() {
+            RegexMode::FancyRegex(re)
+        } else {
+            panic!("Invalid regex pattern: {}", pattern);
+        }
+    }
+
+    pub fn capture(&self, group: impl Into<RegexModeGroup>, text: &str) -> Option<String> {
+        match self {
+            RegexMode::Regex(re) => {
+                let caps = re.captures(text)?;
+                match group.into() {
+                    RegexModeGroup::Index(idx) => caps.get(idx).map(|m| m.as_str().to_string()),
+                    RegexModeGroup::Name(name) => caps.name(&name).map(|m| m.as_str().to_string()),
+                }
+            }
+            RegexMode::FancyRegex(re) => {
+                let caps = re.captures(text).ok()??;
+                match group.into() {
+                    RegexModeGroup::Index(idx) => caps.get(idx).map(|m| m.as_str().to_string()),
+                    RegexModeGroup::Name(name) => caps.name(&name).map(|m| m.as_str().to_string()),
+                }
+            }
+        }
+    }
+
+    pub fn replace_all(&self, text: &str, replacement: &str) -> String {
+        match self {
+            RegexMode::Regex(re) => re.replace_all(text, replacement).to_string(),
+            RegexMode::FancyRegex(re) => re.replace_all(text, replacement).to_string(),
+        }
+    }
+}
+
+impl Display for RegexMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match *self {
+            RegexMode::Regex(_) => write!(f, "Regex"),
+            RegexMode::FancyRegex(_) => write!(f, "FancyRegex"),
+        }
+    }
+}
--- a/sqlfluffrs/src/slice.rs
+++ b/sqlfluffrs/src/slice.rs
@@ -0,0 +1,67 @@
+use std::{fmt::Display, ops::Range};
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy, Serialize, Deserialize)]
+pub struct Slice {
+    pub start: usize,
+    pub stop: usize,
+}
+
+impl From<Range<usize>> for Slice {
+    fn from(value: Range<usize>) -> Self {
+        Self {
+            start: value.start,
+            stop: value.end,
+        }
+    }
+}
+
+impl Slice {
+    pub fn slice_is_point(test_slice: &Range<usize>) -> bool {
+        test_slice.start == test_slice.end
+    }
+
+    pub fn len(&self) -> usize {
+        self.stop - self.start
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl Display for Slice {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "slice({}, {}, None)", self.start, self.stop)
+    }
+}
+
+#[cfg(feature = "python")]
+pub mod python {
+    use super::Slice;
+    use pyo3::{prelude::*, types::PySlice};
+
+    impl<'py> FromPyObject<'py> for Slice {
+        fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
+            let start = obj.getattr("start")?.extract::<usize>()?;
+            let stop = obj.getattr("stop")?.extract::<usize>()?;
+            Ok(Slice { start, stop })
+        }
+    }
+
+    impl<'py> IntoPyObject<'py> for Slice {
+        type Target = PySlice; // the Python type
+        type Output = Bound<'py, Self::Target>; // in most cases this will be `Bound`
+        type Error = PyErr; // the conversion error type, has to be convertible to `PyErr`
+
+        fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
+            Ok(PySlice::new(
+                py,
+                self.start.try_into()?,
+                self.stop.try_into()?,
+                1,
+            ))
+        }
+    }
+}
--- a/sqlfluffrs/src/templater/fileslice.rs
+++ b/sqlfluffrs/src/templater/fileslice.rs
@@ -0,0 +1,323 @@
+use serde::{Deserialize, Serialize};
+
+use crate::slice::Slice;
+
+#[derive(Debug, PartialEq, Clone, Hash, Serialize, Deserialize)]
+pub struct RawFileSlice {
+    pub raw: String, // Source string
+    pub slice_type: String,
+    pub source_idx: usize, // Offset from beginning of source string
+    // Block index, incremented on start or end block tags, e.g. "if", "for".
+    // This is used in `BaseRule.discard_unsafe_fixes()` to reject any fixes
+    // which span multiple templated blocks.
+    pub block_idx: usize,
+    // The command of a templated tag, e.g. "if", "for"
+    // This is used in template tracing as a kind of cache to identify the kind
+    // of template element this is without having to re-extract it each time.
+    pub tag: Option<String>,
+}
+
+impl RawFileSlice {
+    pub fn new(
+        raw: String,
+        slice_type: String,
+        source_idx: usize,
+        block_idx: Option<usize>,
+        tag: Option<String>,
+    ) -> Self {
+        RawFileSlice {
+            raw,
+            slice_type,
+            source_idx,
+            block_idx: block_idx.unwrap_or_default(),
+            tag,
+        }
+    }
+
+    pub fn end_source_idx(&self) -> usize {
+        // Return the closing index of this slice.
+        let len: usize = self.raw.chars().count();
+        self.source_idx + len
+    }
+
+    pub fn source_slice(&self) -> Slice {
+        Slice::from(self.source_idx..self.end_source_idx())
+    }
+
+    pub fn is_source_only_slice(&self) -> bool {
+        // Based on its slice_type, does it only appear in the *source*?
+        // There are some slice types which are automatically source only.
+        // There are *also* some which are source only because they render
+        // to an empty string.
+        // TODO: should any new logic go here?
+        matches!(
+            self.slice_type.as_str(),
+            "comment" | "block_end" | "block_start" | "block_mid"
+        )
+    }
+}
+
+#[derive(Debug, PartialEq, Clone, Hash, Serialize, Deserialize)]
+pub struct TemplatedFileSlice {
+    pub slice_type: String,
+    pub source_codepoint_slice: Slice,
+    pub templated_codepoint_slice: Slice,
+}
+
+impl TemplatedFileSlice {
+    pub fn new(
+        slice_type: String,
+        source_codepoint_slice: Slice,
+        templated_codepoint_slice: Slice,
+    ) -> Self {
+        TemplatedFileSlice {
+            slice_type,
+            source_codepoint_slice,
+            templated_codepoint_slice,
+        }
+    }
+}
+
+#[cfg(feature = "python")]
+pub mod python {
+    use bincode;
+    use pyo3::{prelude::*, types::PyBytes};
+    use serde::{Deserialize, Serialize};
+
+    use crate::slice::Slice;
+
+    use super::{RawFileSlice, TemplatedFileSlice};
+
+    #[pyclass(name = "RsRawFileSlice", module = "sqlfluffrs")]
+    #[repr(transparent)]
+    #[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
+    pub struct PyRawFileSlice(pub(crate) RawFileSlice);
+
+    #[pymethods]
+    impl PyRawFileSlice {
+        #[new]
+        #[pyo3(signature = (raw, slice_type, source_idx, block_idx=0, tag=None))]
+        pub fn new(
+            raw: String,
+            slice_type: String,
+            source_idx: usize,
+            block_idx: Option<usize>,
+            tag: Option<String>,
+        ) -> Self {
+            Self(RawFileSlice::new(
+                raw, slice_type, source_idx, block_idx, tag,
+            ))
+        }
+
+        pub fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
+            *self = bincode::deserialize(state.as_bytes()).map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyException, _>(format!(
+                    "Deserialization error: {}",
+                    e
+                ))
+            })?;
+            Ok(())
+        }
+
+        pub fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
+            let bytes = bincode::serialize(&self.0).map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyException, _>(format!(
+                    "Serialization error: {}",
+                    e
+                ))
+            })?;
+            Ok(PyBytes::new(py, &bytes))
+        }
+
+        pub fn __getnewargs__(&self) -> PyResult<(String, String, usize, usize, Option<String>)> {
+            Ok((
+                self.raw(),
+                self.slice_type(),
+                self.source_idx(),
+                self.block_idx(),
+                self.tag(),
+            ))
+        }
+
+        #[getter]
+        pub fn raw(&self) -> String {
+            self.0.raw.clone()
+        }
+        #[getter]
+        pub fn slice_type(&self) -> String {
+            self.0.slice_type.clone()
+        }
+        #[getter]
+        pub fn source_idx(&self) -> usize {
+            self.0.source_idx
+        }
+        #[getter]
+        pub fn block_idx(&self) -> usize {
+            self.0.block_idx
+        }
+        #[getter]
+        pub fn tag(&self) -> Option<String> {
+            self.0.tag.clone()
+        }
+    }
+
+    impl From<PyRawFileSlice> for RawFileSlice {
+        fn from(value: PyRawFileSlice) -> Self {
+            value.0
+        }
+    }
+
+    impl From<RawFileSlice> for PyRawFileSlice {
+        fn from(value: RawFileSlice) -> Self {
+            Self(value)
+        }
+    }
+
+    #[pyclass(name = "RsTemplatedFileSlice", module = "sqlfluffrs")]
+    #[repr(transparent)]
+    #[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
+    pub struct PyTemplatedFileSlice(pub(crate) TemplatedFileSlice);
+
+    #[pymethods]
+    impl PyTemplatedFileSlice {
+        #[new]
+        fn new(
+            slice_type: String,
+            source_codepoint_slice: Slice,
+            templated_codepoint_slice: Slice,
+        ) -> Self {
+            Self(TemplatedFileSlice::new(
+                slice_type,
+                source_codepoint_slice,
+                templated_codepoint_slice,
+            ))
+        }
+        pub fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
+            *self = bincode::deserialize(state.as_bytes()).map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyException, _>(format!(
+                    "Deserialization error: {}",
+                    e
+                ))
+            })?;
+            Ok(())
+        }
+
+        pub fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
+            let bytes = bincode::serialize(&self.0).map_err(|e| {
+                PyErr::new::<pyo3::exceptions::PyException, _>(format!(
+                    "Serialization error: {}",
+                    e
+                ))
+            })?;
+            Ok(PyBytes::new(py, &bytes))
+        }
+
+        pub fn __getnewargs__(&self) -> PyResult<(String, Slice, Slice)> {
+            Ok((
+                self.0.slice_type.clone(),
+                self.0.source_codepoint_slice,
+                self.0.templated_codepoint_slice,
+            ))
+        }
+
+        #[getter]
+        fn slice_type(&self) -> PyResult<String> {
+            Ok(self.0.slice_type.clone())
+        }
+
+        #[getter]
+        fn source_slice(&self) -> PyResult<Slice> {
+            Ok(self.0.source_codepoint_slice)
+        }
+
+        #[getter]
+        fn templated_slice(&self) -> PyResult<Slice> {
+            Ok(self.0.templated_codepoint_slice)
+        }
+    }
+
+    impl From<PyTemplatedFileSlice> for TemplatedFileSlice {
+        fn from(value: PyTemplatedFileSlice) -> Self {
+            value.0
+        }
+    }
+
+    impl From<TemplatedFileSlice> for PyTemplatedFileSlice {
+        fn from(value: TemplatedFileSlice) -> Self {
+            Self(value)
+        }
+    }
+
+    pub mod sqlfluff {
+        use pyo3::prelude::*;
+
+        use crate::{
+            slice::Slice,
+            templater::fileslice::{RawFileSlice, TemplatedFileSlice},
+        };
+
+        use super::{PyRawFileSlice, PyTemplatedFileSlice};
+
+        #[derive(Clone, IntoPyObject)]
+        pub struct PySqlFluffTemplatedFileSlice(pub PyTemplatedFileSlice);
+
+        impl<'py> FromPyObject<'py> for PySqlFluffTemplatedFileSlice {
+            fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
+                let slice_type = obj.getattr("slice_type")?.extract::<String>()?;
+                let source_slice = obj.getattr("source_slice")?.extract::<Slice>()?;
+                let templated_slice = obj.getattr("templated_slice")?.extract::<Slice>()?;
+
+                Ok(Self(PyTemplatedFileSlice(TemplatedFileSlice::new(
+                    slice_type,
+                    source_slice,
+                    templated_slice,
+                ))))
+            }
+        }
+
+        impl From<PySqlFluffTemplatedFileSlice> for PyTemplatedFileSlice {
+            fn from(value: PySqlFluffTemplatedFileSlice) -> Self {
+                value.0
+            }
+        }
+
+        impl From<PySqlFluffTemplatedFileSlice> for TemplatedFileSlice {
+            fn from(value: PySqlFluffTemplatedFileSlice) -> Self {
+                value.0 .0
+            }
+        }
+
+        #[derive(Clone)]
+        pub struct PySqlFluffRawFileSlice(pub PyRawFileSlice);
+
+        impl<'py> FromPyObject<'py> for PySqlFluffRawFileSlice {
+            fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
+                let raw = obj.getattr("raw")?.extract::<String>()?;
+                let slice_type = obj.getattr("slice_type")?.extract::<String>()?;
+                let source_idx = obj.getattr("source_idx")?.extract::<usize>().ok();
+                let block_idx = obj.getattr("block_idx")?.extract::<usize>().ok();
+                let tag = obj.getattr("tag")?.extract::<Option<String>>()?;
+
+                Ok(Self(PyRawFileSlice(RawFileSlice::new(
+                    raw.clone(),
+                    slice_type,
+                    source_idx.unwrap_or(raw.len()),
+                    block_idx,
+                    tag,
+                ))))
+            }
+        }
+
+        impl From<PySqlFluffRawFileSlice> for PyRawFileSlice {
+            fn from(value: PySqlFluffRawFileSlice) -> Self {
+                value.0
+            }
+        }
+
+        impl From<PySqlFluffRawFileSlice> for RawFileSlice {
+            fn from(value: PySqlFluffRawFileSlice) -> Self {
+                value.0 .0
+            }
+        }
+    }
+}
--- a/sqlfluffrs/src/templater/mod.rs
+++ b/sqlfluffrs/src/templater/mod.rs
@@ -0,0 +1,2 @@
+pub mod fileslice;
+pub mod templatefile;
--- a/sqlfluffrs/src/templater/templatefile.rs
+++ b/sqlfluffrs/src/templater/templatefile.rs
--- a/sqlfluffrs/src/token/compat.rs
+++ b/sqlfluffrs/src/token/compat.rs
@@ -0,0 +1,296 @@
+// Wrapper functions that maintain the old TokenGenerator signature for backward compatibility
+// These are used by the generated dialect matcher code
+
+use super::{config::TokenConfig, Token};
+use crate::{marker::PositionMarker, regex::RegexModeGroup};
+use hashbrown::HashSet;
+
+impl Token {
+    // Wrapper functions that convert from the old 9-parameter signature to TokenConfig
+
+    pub fn whitespace_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::whitespace_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn newline_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::newline_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn comment_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::comment_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn code_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn symbol_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::symbol_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn identifier_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::identifier_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn literal_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::literal_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn binary_operator_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::binary_operator_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn comparison_operator_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::comparison_operator_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn word_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::word_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+
+    pub fn unlexable_token_compat(
+        raw: String,
+        pos_marker: PositionMarker,
+        class_types: HashSet<String>,
+        instance_types: Vec<String>,
+        trim_start: Option<Vec<String>>,
+        trim_chars: Option<Vec<String>>,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+        casefold: Option<fn(&str) -> str>,
+    ) -> Self {
+        Self::unlexable_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types,
+                trim_start,
+                trim_chars,
+                quoted_value,
+                escape_replacement,
+                casefold,
+            },
+        )
+    }
+}
--- a/sqlfluffrs/src/token/config.rs
+++ b/sqlfluffrs/src/token/config.rs
@@ -0,0 +1,93 @@
+use crate::regex::RegexModeGroup;
+use hashbrown::HashSet;
+
+/// Configuration for token construction, grouping optional parameters
+#[derive(Debug, Clone, Default)]
+pub struct TokenConfig {
+    pub class_types: HashSet<String>,
+    pub instance_types: Vec<String>,
+    pub trim_start: Option<Vec<String>>,
+    pub trim_chars: Option<Vec<String>>,
+    pub quoted_value: Option<(String, RegexModeGroup)>,
+    pub escape_replacement: Option<(String, String)>,
+    pub casefold: Option<fn(&str) -> str>,
+}
+
+impl TokenConfig {
+    /// Create a new TokenConfig with default values (all empty/None)
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Create TokenConfig with only instance_types set
+    pub fn with_instance_types(instance_types: Vec<String>) -> Self {
+        Self {
+            instance_types,
+            ..Default::default()
+        }
+    }
+
+    /// Create TokenConfig with class_types and instance_types
+    pub fn with_types(class_types: HashSet<String>, instance_types: Vec<String>) -> Self {
+        Self {
+            class_types,
+            instance_types,
+            ..Default::default()
+        }
+    }
+
+    /// Builder method to add trim_start
+    pub fn trim_start(mut self, chars: Vec<String>) -> Self {
+        self.trim_start = Some(chars);
+        self
+    }
+
+    /// Builder method to add trim_chars
+    pub fn trim_chars(mut self, chars: Vec<String>) -> Self {
+        self.trim_chars = Some(chars);
+        self
+    }
+
+    /// Builder method to add quoted_value
+    pub fn quoted_value(mut self, value: String, mode: RegexModeGroup) -> Self {
+        self.quoted_value = Some((value, mode));
+        self
+    }
+
+    /// Builder method to add escape_replacement
+    pub fn escape_replacement(mut self, pattern: String, replacement: String) -> Self {
+        self.escape_replacement = Some((pattern, replacement));
+        self
+    }
+
+    /// Builder method to add casefold function
+    pub fn casefold(mut self, func: fn(&str) -> str) -> Self {
+        self.casefold = Some(func);
+        self
+    }
+}
+
+/// Helper to extract individual fields for backward compatibility
+impl TokenConfig {
+    pub fn into_parts(
+        self,
+    ) -> (
+        HashSet<String>,
+        Vec<String>,
+        Option<Vec<String>>,
+        Option<Vec<String>>,
+        Option<(String, RegexModeGroup)>,
+        Option<(String, String)>,
+        Option<fn(&str) -> str>,
+    ) {
+        (
+            self.class_types,
+            self.instance_types,
+            self.trim_start,
+            self.trim_chars,
+            self.quoted_value,
+            self.escape_replacement,
+            self.casefold,
+        )
+    }
+}
--- a/sqlfluffrs/src/token/construction.rs
+++ b/sqlfluffrs/src/token/construction.rs
@@ -0,0 +1,442 @@
+use super::{config::TokenConfig, Token};
+use crate::{marker::PositionMarker, slice::Slice, templater::templatefile::TemplatedFile};
+
+use std::sync::Arc;
+
+use hashbrown::HashSet;
+use uuid::Uuid;
+
+impl Token {
+    pub fn base_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+        segments: Vec<Token>,
+    ) -> Self {
+        let TokenConfig {
+            class_types,
+            instance_types,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacement,
+            casefold,
+        } = config;
+
+        let (token_types, class_types) = iter_base_types("base", class_types.clone());
+        let raw_value = Token::normalize(&raw, quoted_value.clone(), escape_replacement.clone());
+        Self {
+            token_type: token_types,
+            instance_types,
+            class_types,
+            comment_separate: false,
+            is_meta: false,
+            allow_empty: false,
+            pos_marker: Some(pos_marker),
+            raw,
+            is_whitespace: false,
+            is_code: true,
+            is_comment: false,
+            _default_raw: "".to_string(),
+            indent_value: 0,
+            is_templated: false,
+            block_uuid: None,
+            source_str: None,
+            block_type: None,
+            parent: None,
+            parent_idx: None,
+            segments,
+            preface_modifier: "".to_string(),
+            suffix: "".to_string(),
+            uuid: Uuid::new_v4().as_u128(),
+            source_fixes: None,
+            trim_start,
+            trim_chars,
+            quoted_value,
+            escape_replacement,
+            casefold,
+            raw_value,
+        }
+    }
+
+    pub fn raw_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("raw", config.class_types.clone());
+        let suffix = format!("'{}'", raw.escape_debug().to_string().trim_matches('"'));
+
+        let mut token = Token::base_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+            vec![],
+        );
+        token.suffix = suffix;
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn code_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        Self::raw_token(raw, pos_marker, config)
+    }
+
+    pub fn symbol_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("symbol", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn identifier_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("identifier", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn literal_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("literal", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn binary_operator_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("binary_operator", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn comparison_operator_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("comparison_operator", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn word_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("word", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn unlexable_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("unlexable", config.class_types.clone());
+        let mut token = Self::code_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token
+    }
+
+    pub fn whitespace_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("whitespace", config.class_types.clone());
+        let mut token = Self::raw_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token.is_whitespace = true;
+        token.is_code = false;
+        token.is_comment = false;
+        token._default_raw = " ".to_string();
+        token
+    }
+
+    pub fn newline_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("newline", config.class_types.clone());
+        let mut token = Self::raw_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token.is_whitespace = true;
+        token.is_code = false;
+        token.is_comment = false;
+        token._default_raw = "\n".to_string();
+        token
+    }
+
+    pub fn comment_token(
+        raw: String,
+        pos_marker: PositionMarker,
+        config: TokenConfig,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("comment", config.class_types.clone());
+        let mut token = Self::raw_token(
+            raw,
+            pos_marker,
+            TokenConfig {
+                class_types,
+                ..config
+            },
+        );
+        token.token_type = token_type;
+        token.is_code = false;
+        token.is_comment = true;
+        token
+    }
+
+    pub fn meta_token(
+        pos_marker: PositionMarker,
+        is_templated: bool,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("meta", class_types.clone());
+        let mut token = Self::raw_token(
+            "".to_string(),
+            pos_marker,
+            TokenConfig {
+                class_types,
+                instance_types: vec![],
+                ..TokenConfig::default()
+            },
+        );
+        token.token_type = token_type;
+        token.is_code = false;
+        token.is_meta = true;
+        token.is_templated = is_templated;
+        token.block_uuid = block_uuid;
+        token.preface_modifier = "[META] ".to_string();
+        token.suffix = String::new();
+        token
+    }
+
+    pub fn end_of_file_token(
+        pos_marker: PositionMarker,
+        is_templated: bool,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("end_of_file", class_types);
+        Self {
+            token_type,
+            ..Self::meta_token(
+                pos_marker,
+                is_templated,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+
+    pub fn indent_token(
+        pos_marker: PositionMarker,
+        is_templated: bool,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("indent", class_types);
+        Self {
+            token_type,
+            indent_value: 1,
+            suffix: block_uuid
+                .map(|u| u.as_hyphenated().to_string())
+                .unwrap_or_default(),
+            ..Self::meta_token(
+                pos_marker,
+                is_templated,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+
+    pub fn dedent_token(
+        pos_marker: PositionMarker,
+        is_templated: bool,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("dedent", class_types);
+        Self {
+            token_type,
+            indent_value: -1,
+            ..Self::indent_token(
+                pos_marker,
+                is_templated,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+
+    pub fn template_loop_token(
+        pos_marker: PositionMarker,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("template_loop", class_types);
+        Self {
+            token_type,
+            ..Self::meta_token(
+                pos_marker,
+                false,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+
+    pub fn template_placeholder_token(
+        pos_marker: PositionMarker,
+        source_string: String,
+        block_type: String,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let (token_type, class_types) = iter_base_types("placeholder", class_types);
+        Self {
+            token_type,
+            block_type: Some(block_type),
+            source_str: Some(source_string),
+            ..Self::meta_token(
+                pos_marker,
+                false,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+
+    pub fn template_placeholder_token_from_slice(
+        source_slice: Slice,
+        templated_slice: Slice,
+        block_type: String,
+        templated_file: &Arc<TemplatedFile>,
+        block_uuid: Option<Uuid>,
+        class_types: HashSet<String>,
+    ) -> Self {
+        let pos_marker = PositionMarker::new(
+            source_slice,
+            templated_slice,
+            templated_file,
+            None,
+            None,
+        );
+        Self {
+            ..Self::template_placeholder_token(
+                pos_marker,
+                templated_file
+                    .source_str
+                    .chars()
+                    .skip(source_slice.start)
+                    .take(source_slice.len())
+                    .collect::<String>(),
+                block_type,
+                block_uuid,
+                class_types,
+            )
+        }
+    }
+}
+
+fn iter_base_types(token_type: &str, class_types: HashSet<String>) -> (String, HashSet<String>) {
+    let mut class_types = class_types;
+    let token_type = token_type.to_string();
+    class_types.insert(token_type.clone());
+    (token_type, class_types)
+}
--- a/sqlfluffrs/src/token/eq.rs
+++ b/sqlfluffrs/src/token/eq.rs
@@ -0,0 +1,24 @@
+use std::hash::Hash;
+
+use super::Token;
+
+impl PartialEq for Token {
+    fn eq(&self, other: &Self) -> bool {
+        self.uuid == other.uuid
+            || (self.token_type == other.token_type
+                && self.raw == other.raw
+                && self.pos_marker.is_some()
+                && other.pos_marker.is_some()
+                && self.pos_marker == other.pos_marker)
+    }
+}
+
+impl Hash for Token {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.token_type.hash(state);
+        self.raw.hash(state);
+        if let Some(p) = self.pos_marker.as_ref() {
+            p.working_loc().hash(state);
+        }
+    }
+}
--- a/sqlfluffrs/src/token/fix.rs
+++ b/sqlfluffrs/src/token/fix.rs
@@ -0,0 +1,21 @@
+use crate::slice::Slice;
+
+#[derive(Debug, Clone)]
+pub struct SourceFix {
+    edit: String,
+    source_slice: Slice,
+    templated_slice: Slice,
+}
+
+impl PartialEq for SourceFix {
+    fn eq(&self, other: &Self) -> bool {
+        self.edit == other.edit && self.source_slice == other.source_slice
+    }
+}
+
+impl std::hash::Hash for SourceFix {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.edit.hash(state);
+        self.source_slice.hash(state);
+    }
+}
--- a/sqlfluffrs/src/token/fmt.rs
+++ b/sqlfluffrs/src/token/fmt.rs
@@ -0,0 +1,14 @@
+use super::Token;
+use std::fmt::Display;
+
+impl Display for Token {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(
+            f,
+            "<{}: ({}) '{}'>",
+            self.token_type.clone(),
+            self.pos_marker.clone().expect("PositionMarker unset"),
+            self.raw.escape_debug(),
+        )
+    }
+}
--- a/sqlfluffrs/src/token/mod.rs
+++ b/sqlfluffrs/src/token/mod.rs
@@ -0,0 +1,830 @@
+pub mod compat;
+pub mod config;
+pub mod construction;
+mod eq;
+pub mod fix;
+mod fmt;
+pub mod path;
+#[cfg(feature = "python")]
+pub mod python;
+
+use std::{
+    fmt::Write,
+    sync::{Arc, Weak},
+};
+
+use fix::SourceFix;
+use hashbrown::HashSet;
+use path::PathStep;
+use uuid::Uuid;
+
+use crate::{
+    marker::PositionMarker,
+    regex::{RegexMode, RegexModeGroup},
+};
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum TupleSerialisedSegment {
+    Str(String, String),
+    Nested(String, Vec<TupleSerialisedSegment>),
+}
+
+#[derive(Debug, Clone)]
+pub struct Token {
+    pub token_type: String,
+    pub instance_types: Vec<String>,
+    pub class_types: HashSet<String>,
+    pub comment_separate: bool,
+    pub is_meta: bool,
+    pub allow_empty: bool,
+    pub pos_marker: Option<PositionMarker>,
+    pub raw: String,
+    is_whitespace: bool,
+    is_code: bool,
+    is_comment: bool,
+    _default_raw: String,
+    pub indent_value: i32,
+    pub is_templated: bool,
+    pub block_uuid: Option<Uuid>,
+    pub source_str: Option<String>,
+    pub block_type: Option<String>,
+    parent: Option<Weak<Token>>,
+    parent_idx: Option<usize>,
+    pub segments: Vec<Token>,
+    preface_modifier: String,
+    suffix: String,
+    pub uuid: u128,
+    pub source_fixes: Option<Vec<SourceFix>>,
+    pub trim_start: Option<Vec<String>>,
+    pub trim_chars: Option<Vec<String>>,
+    quoted_value: Option<(String, RegexModeGroup)>,
+    escape_replacement: Option<(String, String)>,
+    casefold: Option<fn(&str) -> str>,
+    raw_value: String,
+}
+
+impl Token {
+    fn comments(&self) -> Vec<Token> {
+        self.segments
+            .clone()
+            .into_iter()
+            .filter(|s| s.is_type(&["comment"]))
+            .collect::<Vec<_>>()
+    }
+
+    fn non_comments(&self) -> Vec<Token> {
+        self.segments
+            .clone()
+            .into_iter()
+            .filter(|s| !s.is_type(&["comment"]))
+            .collect::<Vec<_>>()
+    }
+
+    /// Returns True if this segment is code.
+    pub fn is_code(&self) -> bool {
+        match self.is_raw() {
+            true => self.is_code,
+            false => self.segments.iter().any(|s| s.is_code()),
+        }
+    }
+
+    fn code_indices(&self) -> Vec<usize> {
+        self.segments
+            .iter()
+            .enumerate()
+            .filter(|(_i, s)| s.is_code())
+            .map(|(i, _s)| i)
+            .collect()
+    }
+
+    pub fn is_comment(&self) -> bool {
+        match self.is_raw() {
+            true => self.is_comment,
+            false => self.segments.iter().all(|s| s.is_comment()),
+        }
+    }
+
+    pub fn is_whitespace(&self) -> bool {
+        match self.is_raw() {
+            true => self.is_whitespace,
+            false => self.segments.iter().all(|s| s.is_whitespace()),
+        }
+    }
+
+    pub fn raw(&self) -> String {
+        self.raw.clone()
+    }
+
+    pub fn raw_upper(&self) -> String {
+        self.raw.to_uppercase()
+    }
+
+    pub fn normalize(
+        value: &str,
+        quoted_value: Option<(String, RegexModeGroup)>,
+        escape_replacement: Option<(String, String)>,
+    ) -> String {
+        let mut str_buffer = value.to_string();
+
+        if let Some((ref regex_str, idx)) = quoted_value {
+            if let Some(captured) = RegexMode::new(regex_str).capture(idx, value) {
+                str_buffer = captured
+            }
+        }
+
+        if let Some((ref regex_str, ref replacement)) = escape_replacement {
+            str_buffer = RegexMode::new(regex_str).replace_all(&str_buffer, replacement.as_str());
+        }
+
+        str_buffer
+    }
+
+    pub fn raw_segments(&self) -> Vec<Token> {
+        match self.is_raw() {
+            true => vec![self.clone()],
+            false => self
+                .segments
+                .iter()
+                .flat_map(|s| s.raw_segments())
+                .collect::<Vec<_>>(),
+        }
+    }
+
+    /// The set of full types for this token, including inherited.
+    /// Adds the surrogate type for raw segments.
+    pub fn class_types(&self) -> HashSet<String> {
+        let mut full_types = self.instance_types.iter().cloned().collect::<HashSet<_>>();
+        full_types.extend(self.class_types.clone());
+        full_types
+    }
+
+    pub fn descendant_type_set(&self) -> HashSet<String> {
+        self.segments
+            .iter()
+            .flat_map(|seg| {
+                seg.descendant_type_set()
+                    .union(&seg.class_types())
+                    .cloned()
+                    .collect::<HashSet<String>>()
+            })
+            .collect::<HashSet<String>>()
+    }
+
+    pub fn direct_descendant_type_set(&self) -> HashSet<String> {
+        self.segments
+            .iter()
+            .flat_map(|seg| seg.class_types())
+            .collect::<HashSet<String>>()
+    }
+
+    pub fn raw_segments_with_ancestors(&self) -> Vec<(Token, Vec<PathStep>)> {
+        todo!()
+    }
+
+    pub fn source_fixes(&self) -> Vec<SourceFix> {
+        match self.is_raw() {
+            true => self.source_fixes.clone().unwrap_or_default(),
+            false => self
+                .segments
+                .iter()
+                .flat_map(|s| s.source_fixes())
+                .collect(),
+        }
+    }
+
+    pub fn first_non_whitespace_segment_raw_upper(&self) -> Option<String> {
+        self.raw_segments().iter().find_map(|seg| {
+            if !seg.raw_upper().trim().is_empty() {
+                Some(seg.raw_upper().clone())
+            } else {
+                None
+            }
+        })
+    }
+
+    pub fn is_templated(&self) -> bool {
+        let pos_marker = self.pos_marker.clone().expect("PositionMarker must be set");
+        pos_marker.source_slice.start != pos_marker.source_slice.stop && !pos_marker.is_literal()
+    }
+
+    pub fn get_type(&self) -> String {
+        self.token_type.clone()
+    }
+
+    pub fn is_type(&self, seg_types: &[&str]) -> bool {
+        if self
+            .instance_types
+            .iter()
+            .any(|s| seg_types.contains(&s.as_str()))
+        {
+            return true;
+        }
+        self.class_is_type(seg_types)
+    }
+
+    pub fn get_raw_segments(&self) -> Vec<Token> {
+        todo!()
+    }
+
+    pub fn raw_trimmed(&self) -> String {
+        let mut raw_buff = self.raw.clone();
+
+        // Trim start sequences
+        if let Some(trim_start) = &self.trim_start {
+            for seq in trim_start {
+                raw_buff = raw_buff.strip_prefix(seq).unwrap_or(&raw_buff).to_string();
+            }
+        }
+
+        // Trim specified characters from both ends
+        if let Some(trim_chars) = &self.trim_chars {
+            raw_buff = self.raw.clone(); // Reset raw_buff before trimming chars
+
+            for seq in trim_chars {
+                while raw_buff.starts_with(seq) {
+                    raw_buff = raw_buff.strip_prefix(seq).unwrap_or(&raw_buff).to_string();
+                }
+                while raw_buff.ends_with(seq) {
+                    raw_buff = raw_buff.strip_suffix(seq).unwrap_or(&raw_buff).to_string();
+                }
+            }
+        }
+
+        raw_buff
+    }
+
+    fn _raw_normalized(&self) -> String {
+        todo!()
+    }
+
+    pub fn raw_normalized(&self) -> String {
+        todo!()
+    }
+
+    pub fn stringify(&self, ident: usize, tabsize: usize, code_only: bool) -> String {
+        let mut buff = String::new();
+        let preface = self.preface(ident, tabsize);
+        writeln!(buff, "{}", preface).unwrap();
+
+        if !code_only && self.comment_separate && !self.comments().is_empty() {
+            if !self.comments().is_empty() {
+                writeln!(buff, "{}Comments:", " ".repeat((ident + 1) * tabsize)).unwrap();
+                for seg in &self.comments() {
+                    let segment_string = seg.stringify(ident + 2, tabsize, code_only);
+                    buff.push_str(&segment_string);
+                }
+            }
+
+            if !self.non_comments().is_empty() {
+                writeln!(buff, "{}Code:", " ".repeat((ident + 1) * tabsize)).unwrap();
+                for seg in &self.non_comments() {
+                    let segment_string = seg.stringify(ident + 2, tabsize, code_only);
+                    buff.push_str(&segment_string);
+                }
+            }
+        } else {
+            for seg in &self.segments {
+                if !code_only || seg.is_code {
+                    let segment_string = seg.stringify(ident + 1, tabsize, code_only);
+                    buff.push_str(&segment_string);
+                }
+            }
+        }
+
+        buff
+    }
+
+    pub fn edit(&self, raw: Option<String>, source_fixes: Option<Vec<SourceFix>>) -> Self {
+        Self {
+            raw: raw.unwrap_or(self.raw.clone()),
+            source_fixes: Some(source_fixes.unwrap_or(self.source_fixes())),
+            uuid: Uuid::new_v4().as_u128(),
+            ..self.clone()
+        }
+    }
+
+    // pub fn _get_raw_segment_kwargs(&self) -> HashMap<String, _> {
+    //     let kwargs = HashMap::new();
+    //     kwargs.insert("quoted_value", self.quoted_value);
+    //     kwargs.insert("escape_replacements", vec![self.escape_replacement]);
+    //     kwargs
+    // }
+
+    pub fn iter_unparseables(&self) -> Vec<Token> {
+        self.segments
+            .iter()
+            .flat_map(|s| s.iter_unparseables())
+            .collect()
+    }
+
+    pub fn set_parent(&mut self, parent: Arc<Token>, idx: usize) {
+        self.parent = Some(Arc::downgrade(&parent));
+        self.parent_idx = Some(idx);
+    }
+
+    pub fn class_is_type(&self, seg_types: &[&str]) -> bool {
+        let seg_hash: HashSet<&str> = seg_types.iter().cloned().collect();
+        !self
+            .class_types
+            .iter()
+            .filter(|s| seg_hash.contains(s.as_str()))
+            .collect::<Vec<_>>()
+            .is_empty()
+    }
+
+    pub fn count_segments(&self, raw_only: bool) -> usize {
+        if self.is_raw() {
+            1
+        } else {
+            let self_count = if raw_only { 0 } else { 1 };
+            self.segments
+                .iter()
+                .fold(0, |acc, s| acc + s.count_segments(raw_only) + self_count)
+        }
+    }
+
+    pub fn is_raw(&self) -> bool {
+        self.segments.is_empty()
+    }
+
+    pub fn block_type(&self) -> Option<String> {
+        self.block_type.clone()
+    }
+
+    pub fn recursive_crawl(
+        &self,
+        seg_types: &[&str],
+        recurse_into: bool,
+        no_recursive_seg_type: Option<&[&str]>,
+        allow_self: bool,
+    ) -> Vec<Token> {
+        let mut results = Vec::new();
+
+        // If recurse_into is False and this matches, don't recurse
+        if !recurse_into && self.is_type(seg_types) {
+            if allow_self {
+                results.push(self.clone());
+            }
+            return results;
+        }
+
+        // Check if self matches the given segment types
+        if allow_self && self.is_type(seg_types) {
+            results.push(self.clone());
+        }
+
+        // Convert no_recursive_seg_type to HashSet for efficient lookups
+        let no_recursive_set: HashSet<&str> = no_recursive_seg_type
+            .unwrap_or(&[])
+            .iter()
+            .cloned()
+            .collect();
+
+        // Recursively process child segments
+        for seg in &self.segments {
+            if no_recursive_set.contains(seg.token_type.as_str()) {
+                continue;
+            }
+            results.extend(seg.recursive_crawl(
+                seg_types,
+                recurse_into,
+                no_recursive_seg_type,
+                true,
+            ));
+        }
+
+        results
+    }
+
+    pub fn path_to(self, other: Self) -> Vec<PathStep> {
+        // Return empty if they are the same segment.
+        if self == other {
+            return vec![];
+        }
+
+        // If there are no child segments, return empty.
+        if self.segments.is_empty() {
+            return vec![];
+        }
+
+        // Identifying the highest parent we can using any preset parent values.
+        let mut midpoint = other.clone();
+        let mut lower_path = Vec::new();
+
+        while let Some(weak_parent) = &midpoint.parent.clone().as_ref() {
+            if let Some(parent) = weak_parent.upgrade() {
+                let parent_idx = midpoint.parent_idx.expect("Parent index must be set.");
+
+                lower_path.push(PathStep {
+                    segment: Arc::clone(&parent),
+                    idx: parent_idx,
+                    len: parent.segments.len(),
+                    code_idxs: parent.code_indices().clone(),
+                });
+
+                midpoint = Arc::unwrap_or_clone(parent);
+                if midpoint == self {
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+
+        // Reverse the path so far
+        lower_path.reverse();
+
+        // If we have already found the parent, return.
+        if midpoint == self {
+            return lower_path;
+        }
+        // If we've gone all the way up to the file segment, return empty.
+        if midpoint.class_is_type(&["file"]) {
+            return vec![];
+        }
+        // Check if midpoint is within self's range.
+        if !(self.get_start_loc() <= midpoint.get_start_loc()
+            && midpoint.get_start_loc() <= self.get_end_loc())
+        {
+            return vec![];
+        }
+
+        // Now, work downward from `self` toward `midpoint`.
+        for (idx, seg) in self.segments.clone().iter().enumerate() {
+            // Set the parent if it's not already set.
+            let seg = seg.clone();
+            seg.clone().set_parent(Arc::new(self.clone()), idx);
+
+            let step = PathStep {
+                segment: Arc::new(self.clone()),
+                idx,
+                len: self.segments.clone().len(),
+                code_idxs: self.code_indices().clone(),
+            };
+
+            // If we found the target
+            if seg == midpoint {
+                let mut result = vec![step];
+                result.extend(lower_path);
+                return result;
+            }
+
+            // Check recursively if a path exists
+            let res = seg.path_to(midpoint.clone());
+            if !res.is_empty() {
+                let mut result = vec![step];
+                result.extend(res);
+                result.extend(lower_path);
+                return result;
+            }
+        }
+
+        // Not found.
+        vec![]
+    }
+
+    pub fn get_start_loc(&self) -> (usize, usize) {
+        self.pos_marker
+            .clone()
+            .expect("PositionMarker unset")
+            .working_loc()
+    }
+
+    pub fn get_end_loc(&self) -> (usize, usize) {
+        self.pos_marker
+            .clone()
+            .expect("PositionMarker unset")
+            .working_loc_after(&self.raw)
+    }
+
+    pub fn recursive_crawl_all(&self, reverse: bool) -> Box<dyn Iterator<Item = &Token> + '_> {
+        if reverse {
+            Box::new(
+                self.segments
+                    .iter()
+                    .rev()
+                    .flat_map(move |seg| seg.recursive_crawl_all(reverse))
+                    .chain(std::iter::once(self)),
+            )
+        } else {
+            Box::new(
+                std::iter::once(self).chain(
+                    self.segments
+                        .iter()
+                        .flat_map(move |seg| seg.recursive_crawl_all(reverse)),
+                ),
+            )
+        }
+    }
+
+    fn preface(&self, ident: usize, tabsize: usize) -> String {
+        let padding = " ".repeat(ident * tabsize);
+        let padded_type = format!("{}{}{}:", padding, self.preface_modifier, self.get_type());
+
+        let pos = self.pos_marker.clone();
+        let suffix = self.suffix.clone();
+
+        let preface = format!(
+            "{:<20}|{:<60}  {}",
+            pos.clone()
+                .expect("PositionMarker unset")
+                .to_source_string(),
+            padded_type,
+            suffix
+        );
+
+        preface.trim_end().to_string()
+    }
+
+    pub fn to_tuple(
+        &self,
+        code_only: Option<bool>,
+        show_raw: Option<bool>,
+        include_meta: Option<bool>,
+    ) -> TupleSerialisedSegment {
+        let code_only = code_only.unwrap_or_default();
+        let show_raw = show_raw.unwrap_or_default();
+        let include_meta = include_meta.unwrap_or_default();
+        // If `show_raw` is true and there are no child segments, return (type, raw)
+        if show_raw && self.segments.is_empty() {
+            return TupleSerialisedSegment::Str(self.get_type(), self.raw.clone());
+        }
+
+        // Determine filtering criteria for child segments
+        let filtered_segments: Vec<TupleSerialisedSegment> = self
+            .segments
+            .iter()
+            .filter(|seg| {
+                if code_only {
+                    seg.is_code && !seg.is_meta
+                } else {
+                    include_meta || !seg.is_meta
+                }
+            })
+            .map(|seg| seg.to_tuple(Some(code_only), Some(show_raw), Some(include_meta)))
+            .collect();
+
+        TupleSerialisedSegment::Nested(self.get_type(), filtered_segments)
+    }
+
+    pub fn copy(
+        &self,
+        segments: Option<Vec<Token>>,
+        parent: Option<Arc<Token>>,
+        parent_idx: Option<usize>,
+    ) -> Token {
+        let mut new_segment = self.clone();
+        new_segment.parent = parent.as_ref().map(Arc::downgrade);
+        new_segment.parent_idx = parent_idx;
+
+        if let Some(ref segs) = segments {
+            new_segment.segments = segs.clone();
+        } else {
+            new_segment.segments = self
+                .segments
+                .iter()
+                .enumerate()
+                .map(|(idx, seg)| {
+                    seg.copy(
+                        None,
+                        Some(Arc::new(new_segment.clone())),
+                        Some(idx),
+                    )
+                })
+                .collect();
+        }
+
+        new_segment
+    }
+
+    pub fn position_segments(segments: &[Token], parent_pos: PositionMarker) -> Vec<Token> {
+        assert!(
+            !segments.is_empty(),
+            "position_segments called on empty sequence."
+        );
+        let mut line_no = parent_pos.working_line_no;
+        let mut line_pos = parent_pos.working_line_pos;
+
+        let mut segment_buffer = Vec::new();
+
+        for (idx, segment) in segments.iter().enumerate() {
+            let old_position = segment.pos_marker.clone();
+            let mut new_position = segment.pos_marker.clone();
+
+            // If position is missing, try to infer it
+            if new_position.is_none() {
+                let mut start_point = None;
+                if idx > 0 {
+                    let prev_seg: &Token = &segment_buffer[idx - 1];
+                    if let Some(ref pos_marker) = prev_seg.pos_marker {
+                        start_point = Some(pos_marker.end_point_marker());
+                    }
+                } else {
+                    start_point = Some(parent_pos.start_point_marker());
+                }
+
+                // Search forward for the end point
+                let mut end_point = None;
+                for fwd_seg in &segments[idx + 1..] {
+                    if let Some(ref pos_marker) = fwd_seg.pos_marker {
+                        end_point = Some(pos_marker.start_point_marker());
+                        break;
+                    }
+                }
+
+                new_position = match (start_point, end_point) {
+                    (Some(start), Some(end)) if start != end => {
+                        Some(PositionMarker::from_points(&start, &end))
+                    }
+                    (Some(start), _) => Some(start),
+                    (_, Some(end)) => Some(end),
+                    _ => panic!("Unable to position new segment"),
+                };
+            }
+
+            let new_position = new_position.expect("Position should be assigned");
+            let new_position = new_position.with_working_position(line_no, line_pos);
+            let (new_line_no, new_line_pos) =
+                new_position.infer_next_position(&segment.raw, line_no, line_pos);
+            line_no = new_line_no;
+            line_pos = new_line_pos;
+
+            // If position changed, recursively process child segments before copying
+            let new_segment =
+                if !segment.segments.is_empty() && old_position != Some(new_position.clone()) {
+                    let child_segments =
+                        Token::position_segments(&segment.segments, new_position.clone());
+                    segment.copy(Some(child_segments), None, None)
+                } else {
+                    segment.copy(None, None, None)
+                };
+
+            segment_buffer.push(new_segment);
+        }
+
+        segment_buffer
+    }
+
+    // /// Simplifies the structure of the token recursively for serialization.
+    // pub fn structural_simplify(&self) -> HashMap<String, Option<serde_json::Value>> {
+    //     let mut result = HashMap::new();
+    //     let key = self.get_type();
+
+    //     if self.segments.is_empty() {
+    //         // If there are no child segments, return the raw value.
+    //         result.insert(key, Some(serde_json::Value::String(self.raw.clone())));
+    //     } else {
+    //         // Simplify all child segments recursively.
+    //         let mut child_results = Vec::new();
+    //         for segment in &self.segments {
+    //             child_results.push(serde_json::Value::Object(
+    //                 segment.structural_simplify(),
+    //             ));
+    //         }
+
+    //         // Check for duplicate keys in child results.
+    //         let mut subkeys = Vec::new();
+    //         for child in &child_results {
+    //             if let serde_json::Value::Object(map) = child {
+    //                 subkeys.extend(map.keys().cloned());
+    //             }
+    //         }
+
+    //         if subkeys.len() != subkeys.iter().collect::<std::collections::HashSet<_>>().len() {
+    //             // If there are duplicate keys, use a list of child objects.
+    //             result.insert(key, Some(serde_json::Value::Array(child_results)));
+    //         } else {
+    //             // Otherwise, merge child objects into a single map.
+    //             let mut merged_map = HashMap::new();
+    //             for child in child_results {
+    //                 if let serde_json::Value::Object(map) = child {
+    //                     for (k, v) in map {
+    //                         merged_map.insert(k, v);
+    //                     }
+    //                 }
+    //             }
+    //             result.insert(key, Some(serde_json::Value::Object(merged_map)));
+    //         }
+    //     }
+
+    //     result
+    // }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::matcher::TokenGenerator;
+    use crate::slice::Slice;
+    use crate::templater::templatefile::TemplatedFile;
+
+    use super::*;
+
+    /// Roughly generate test segments.
+    ///
+    /// This function isn't totally robust, but good enough
+    /// for testing. Use with caution.
+    fn generate_test_segments(elems: &[&str]) -> Vec<Token> {
+        let mut buff = vec![];
+        let templated_file = Arc::new(TemplatedFile::from(
+            elems.iter().cloned().collect::<String>(),
+        ));
+        let mut idx = 0;
+
+        for elem in elems {
+            let elem = &**elem;
+            if elem == "<indent>" {
+                buff.push(Token::indent_token(
+                    PositionMarker::from_point(idx, idx, &templated_file, None, None),
+                    false,
+                    None,
+                    HashSet::new(),
+                ));
+                continue;
+            } else if elem == "<dedent>" {
+                buff.push(Token::dedent_token(
+                    PositionMarker::from_point(idx, idx, &templated_file, None, None),
+                    false,
+                    None,
+                    HashSet::new(),
+                ));
+                continue;
+            }
+            let (token_fn, instance_types): (TokenGenerator, Vec<String>) =
+                match elem {
+                    " " | "\t" => (
+                        Token::whitespace_token_compat,
+                        Vec::new(),
+                    ),
+                    "\n" => (Token::newline_token_compat, Vec::new()),
+                    "(" => (
+                        Token::symbol_token_compat,
+                        Vec::from_iter(["start_bracket".to_string()]),
+                    ),
+                    ")" => (
+                        Token::symbol_token_compat,
+                        Vec::from_iter(["end_bracket".to_string()]),
+                    ),
+                    "[" => (
+                        Token::symbol_token_compat,
+                        Vec::from_iter(["start_square_bracket".to_string()]),
+                    ),
+                    "]" => (
+                        Token::symbol_token_compat,
+                        Vec::from_iter(["end_square_bracket".to_string()]),
+                    ),
+                    s if s.starts_with("--") => (
+                        Token::comment_token_compat,
+                        Vec::from_iter(["inline_comment".to_string()]),
+                    ),
+                    s if s.starts_with("\"") => (
+                        Token::code_token_compat,
+                        Vec::from_iter(["double_quote".to_string()]),
+                    ),
+                    s if s.starts_with("'") => (
+                        Token::code_token_compat,
+                        Vec::from_iter(["single_quote".to_string()]),
+                    ),
+                    _ => (Token::code_token_compat, Vec::new()),
+                };
+
+            buff.push(token_fn(
+                elem.into(),
+                PositionMarker::new(
+                    Slice {
+                        start: idx,
+                        stop: idx + elem.len(),
+                    },
+                    Slice {
+                        start: idx,
+                        stop: idx + elem.len(),
+                    },
+                    &templated_file,
+                    None,
+                    None,
+                ),
+                HashSet::new(),
+                instance_types,
+                None,
+                None,
+                None,
+                None,
+                None,
+            ));
+            idx += elem.len();
+        }
+
+        buff
+    }
+
+    fn raw_segments() -> Vec<Token> {
+        generate_test_segments(&["foobar", ".barfoo"])
+    }
+
+    #[test]
+    /// Test niche case of calling get_raw_segments on a raw segment.
+    fn test_parser_raw_get_raw_segments() {
+        for s in raw_segments() {
+            assert_eq!(s.raw_segments(), [s]);
+        }
+    }
+}
--- a/sqlfluffrs/src/token/path.rs
+++ b/sqlfluffrs/src/token/path.rs
@@ -0,0 +1,10 @@
+use super::Token;
+use std::sync::Arc;
+
+#[derive(Debug, Clone)]
+pub struct PathStep {
+    pub segment: Arc<Token>,
+    pub idx: usize,
+    pub len: usize,
+    pub code_idxs: Vec<usize>,
+}
--- a/sqlfluffrs/src/token/python.rs
+++ b/sqlfluffrs/src/token/python.rs
@@ -0,0 +1,567 @@
+use std::{
+    fmt::{Debug, Display},
+    sync::Arc,
+};
+
+use hashbrown::HashSet;
+use pyo3::{
+    prelude::*,
+    types::{PyDict, PyString, PyTuple, PyType},
+};
+use uuid::Uuid;
+
+use crate::{
+    marker::python::{PyPositionMarker, PySqlFluffPositionMarker},
+    regex::RegexModeGroup,
+};
+
+use super::{path::PathStep, SourceFix, Token, TupleSerialisedSegment};
+
+#[pyclass(name = "RsSourceFix")]
+#[repr(transparent)]
+#[derive(Clone)]
+pub struct PySourceFix(pub SourceFix);
+
+impl From<PySourceFix> for SourceFix {
+    fn from(value: PySourceFix) -> SourceFix {
+        value.0
+    }
+}
+
+impl From<SourceFix> for PySourceFix {
+    fn from(value: SourceFix) -> Self {
+        Self(value)
+    }
+}
+
+#[pyclass(name = "RsPathStep")]
+#[repr(transparent)]
+#[derive(Clone)]
+pub struct PyPathStep(pub PathStep);
+
+impl From<PyPathStep> for PathStep {
+    fn from(value: PyPathStep) -> Self {
+        value.0
+    }
+}
+
+impl From<PathStep> for PyPathStep {
+    fn from(value: PathStep) -> Self {
+        Self(value)
+    }
+}
+
+#[pyclass(name = "RsTupleSerialisedSegment")]
+#[repr(transparent)]
+#[derive(Clone)]
+pub struct PyTupleSerialisedSegment(pub TupleSerialisedSegment);
+
+impl PyTupleSerialisedSegment {
+    pub fn to_py_tuple<'py>(&self, py: Python<'py>) -> Result<Bound<'py, PyTuple>, PyErr> {
+        match &self.0 {
+            TupleSerialisedSegment::Str(segment_type, raw_value) => {
+                PyTuple::new(py, [segment_type, raw_value])
+            }
+            TupleSerialisedSegment::Nested(segment_type, segments) => {
+                let py_segment_type = PyString::new(py, segment_type);
+                let py_segments: Vec<_> = segments
+                    .iter()
+                    .map(|s| {
+                        PyTupleSerialisedSegment::to_py_tuple(
+                            &PyTupleSerialisedSegment(s.clone()),
+                            py,
+                        )
+                    })
+                    .collect::<Result<Vec<_>, _>>()?;
+                let pt_segments_tuple = PyTuple::new(py, &py_segments)?;
+
+                PyTuple::new(
+                    py,
+                    &[py_segment_type.into_any(), pt_segments_tuple.into_any()],
+                )
+            }
+        }
+    }
+}
+
+impl From<PyTupleSerialisedSegment> for TupleSerialisedSegment {
+    fn from(value: PyTupleSerialisedSegment) -> Self {
+        value.0
+    }
+}
+
+impl From<TupleSerialisedSegment> for PyTupleSerialisedSegment {
+    fn from(value: TupleSerialisedSegment) -> Self {
+        Self(value)
+    }
+}
+
+#[pyclass(name = "RsToken", weakref, module = "sqlfluffrs")]
+#[repr(transparent)]
+#[derive(Debug, Clone)]
+pub struct PyToken(pub Token);
+
+#[pymethods]
+impl PyToken {
+    #[getter]
+    pub fn raw(&self) -> String {
+        self.0.raw.to_string()
+    }
+
+    pub fn raw_trimmed(&self) -> String {
+        self.0.raw_trimmed()
+    }
+
+    #[getter]
+    pub fn pos_marker(&self) -> Option<PyPositionMarker> {
+        self.0.pos_marker.clone().map(PyPositionMarker)
+    }
+
+    #[setter]
+    pub fn set_pos_marker(&mut self, value: Option<PySqlFluffPositionMarker>) {
+        self.0.pos_marker = value.map(Into::into);
+    }
+
+    pub fn get_type(&self) -> String {
+        self.0.get_type()
+    }
+
+    #[getter(r#type)]
+    pub fn type_(&self) -> String {
+        self.0.get_type()
+    }
+
+    #[getter]
+    pub fn is_templated(&self) -> bool {
+        self.0.is_templated()
+    }
+
+    #[getter]
+    pub fn is_code(&self) -> bool {
+        self.0.is_code
+    }
+
+    #[getter]
+    pub fn is_meta(&self) -> bool {
+        self.0.is_meta
+    }
+
+    #[getter]
+    pub fn source_str(&self) -> Option<String> {
+        self.0.source_str.clone()
+    }
+
+    #[getter]
+    pub fn block_type(&self) -> Option<String> {
+        self.0.block_type()
+    }
+
+    #[getter]
+    pub fn block_uuid(&self) -> Option<Uuid> {
+        self.0.block_uuid
+    }
+
+    #[getter]
+    pub fn cache_key(&self) -> String {
+        use std::hash::{Hash, Hasher};
+        use std::collections::hash_map::DefaultHasher;
+
+        let mut hasher = DefaultHasher::new();
+        self.0.token_type.hash(&mut hasher);
+        for t in &self.0.instance_types {
+            t.hash(&mut hasher);
+        }
+        format!("{:016x}", hasher.finish())
+    }
+
+    #[getter]
+    pub fn trim_start(&self) -> Option<Vec<String>> {
+        self.0.trim_start.clone()
+    }
+
+    #[getter]
+    pub fn trim_chars(&self) -> Option<Vec<String>> {
+        self.0.trim_chars.clone()
+    }
+
+    #[pyo3(signature = (raw_only = false))]
+    pub fn count_segments(&self, raw_only: Option<bool>) -> usize {
+        self.0.count_segments(raw_only.unwrap_or_default())
+    }
+
+    #[pyo3(signature = (*seg_type))]
+    pub fn is_type(&self, seg_type: &Bound<'_, PyTuple>) -> bool {
+        let seg_strs = seg_type
+            .extract::<Vec<String>>()
+            .expect("args should be all strings");
+        self.0
+            .is_type(&seg_strs.iter().map(String::as_str).collect::<Vec<&str>>())
+    }
+
+    #[getter]
+    pub fn indent_val(&self) -> i32 {
+        self.0.indent_value
+    }
+
+    #[getter]
+    pub fn is_whitespace(&self) -> bool {
+        self.0.is_whitespace
+    }
+
+    pub fn is_raw(&self) -> bool {
+        self.0.is_raw()
+    }
+
+    #[getter]
+    pub fn is_comment(&self) -> bool {
+        self.0.is_comment
+    }
+
+    #[getter]
+    pub fn class_types(&self) -> HashSet<String> {
+        self.0.class_types()
+    }
+
+    #[getter]
+    pub fn instance_types(&self) -> Vec<String> {
+        self.0.instance_types.clone()
+    }
+
+    #[getter]
+    pub fn preface_modifier(&self) -> String {
+        self.0.preface_modifier.clone()
+    }
+
+    #[getter]
+    pub fn source_fixes(&self) -> Vec<PySourceFix> {
+        self.0.source_fixes().into_iter().map(Into::into).collect()
+    }
+
+    #[getter]
+    pub fn _source_fixes(&self) -> Option<Vec<PySourceFix>> {
+        self.0
+            .source_fixes
+            .clone()
+            .map(|sf| sf.into_iter().map(Into::into).collect())
+    }
+
+    #[pyo3(signature = (*seg_type))]
+    pub fn class_is_type(&self, seg_type: &Bound<'_, PyTuple>) -> bool {
+        let seg_strs = seg_type
+            .extract::<Vec<String>>()
+            .expect("args should be all strings");
+        self.0
+            .class_is_type(&seg_strs.iter().map(String::as_str).collect::<Vec<&str>>())
+    }
+
+    #[getter]
+    pub fn first_non_whitespace_segment_raw_upper(&self) -> Option<String> {
+        self.0.first_non_whitespace_segment_raw_upper()
+    }
+
+    #[getter]
+    pub fn raw_upper(&self) -> String {
+        self.0.raw_upper()
+    }
+
+    pub fn invalidate_caches(&self) {}
+
+    #[getter]
+    pub fn uuid(&self) -> u128 {
+        self.0.uuid
+    }
+
+    #[getter]
+    pub fn descendant_type_set(&self) -> HashSet<String> {
+        self.0.descendant_type_set()
+    }
+
+    #[pyo3(signature = (*seg_type, recurse_into = true, no_recursive_seg_type = None, allow_self = true))]
+    pub fn recursive_crawl(
+        &self,
+        seg_type: &Bound<'_, PyTuple>,
+        recurse_into: bool,
+        no_recursive_seg_type: Option<Bound<'_, PyAny>>,
+        allow_self: bool,
+    ) -> Vec<PyToken> {
+        let seg_type = seg_type
+            .extract::<Vec<String>>()
+            .expect("args should be all strings");
+        let temp: Option<Vec<String>> = match no_recursive_seg_type {
+            Some(py_any) => {
+                if let Ok(single_str) = py_any.extract::<String>() {
+                    Some(vec![single_str]) // Convert single string into a Vec<String>
+                } else if let Ok(list_of_str) = py_any.extract::<Vec<String>>() {
+                    Some(list_of_str) // Already a Vec<String>, return as is
+                } else {
+                    Some(vec![]) // If it's neither, return an empty vector
+                }
+            }
+            None => None, // If None, return an empty vector
+        };
+        let no_recursive_seg_type: Option<Vec<&str>> = temp
+            .as_ref()
+            .map(|vec| vec.iter().map(String::as_str).collect());
+
+        self.0
+            .recursive_crawl(
+                &seg_type.iter().map(String::as_str).collect::<Vec<&str>>(),
+                recurse_into,
+                no_recursive_seg_type.as_deref(),
+                allow_self,
+            )
+            .into_iter()
+            .map(Into::into)
+            .collect()
+    }
+
+    pub fn recursive_crawl_all(&self, reverse: bool) -> Vec<PyToken> {
+        self.0
+            .recursive_crawl_all(reverse)
+            .map(|t| t.clone().into())
+            .collect()
+    }
+
+    #[getter]
+    pub fn segments(&self) -> Vec<PyToken> {
+        self.0
+            .segments
+            .clone()
+            .into_iter()
+            .map(Into::into)
+            .collect()
+    }
+
+    pub fn path_to(&self, other: PyToken) -> Vec<PyPathStep> {
+        self.0
+            .clone()
+            .path_to(other.into())
+            .into_iter()
+            .map(Into::into)
+            .collect()
+    }
+
+    pub fn get_start_loc(&self) -> (usize, usize) {
+        self.0.get_start_loc()
+    }
+
+    pub fn get_end_loc(&self) -> (usize, usize) {
+        self.0.get_end_loc()
+    }
+
+    #[getter]
+    pub fn raw_segments(&self) -> Vec<PyToken> {
+        self.0.raw_segments().into_iter().map(Into::into).collect()
+    }
+
+    pub fn _get_raw_segment_kwargs<'py>(&self, py: Python<'py>) -> Bound<'py, PyDict> {
+        let dict = PyDict::new(py);
+        if let Some(ref quoted_value) = self.0.quoted_value {
+            dict.set_item("quoted_value", quoted_value.clone()).unwrap();
+        } else {
+            dict.set_item("quoted_value", py.None()).unwrap();
+        }
+        if let Some(ref escape_replacement) = self.0.escape_replacement {
+            dict.set_item("escape_replacements", vec![escape_replacement])
+                .unwrap();
+        } else {
+            dict.set_item("escape_replacements", py.None()).unwrap();
+        }
+        dict
+    }
+
+    #[getter]
+    pub fn quoted_value(&self, py: Python<'_>) -> Option<(String, Py<PyAny>)> {
+        self.0.quoted_value.clone().map(|(s, g)| {
+            let py_group: Py<PyAny> = match g {
+                RegexModeGroup::Index(idx) => idx.into_pyobject(py).unwrap().into(),
+                RegexModeGroup::Name(name) => name.into_pyobject(py).unwrap().into(),
+            };
+            (s, py_group)
+        })
+    }
+
+    #[getter]
+    pub fn escape_replacements(&self) -> Option<Vec<(String, String)>> {
+        if self.0.escape_replacement.is_none() {
+            None
+        } else {
+            Some(vec![self.0.escape_replacement.clone().unwrap()])
+        }
+    }
+
+    pub fn set_parent(&self, parent: &Bound<'_, PyAny>, idx: usize) -> PyResult<()> {
+        let parent: Arc<Token> = parent
+            .extract()
+            .map(|t: PySqlFluffToken| Arc::new(t.0 .0))?;
+        let mut inner = self.0.clone();
+        inner.set_parent(parent, idx);
+        Ok(())
+    }
+
+    pub fn get_parent(&self) -> Option<(PyToken, i32)> {
+        None
+    }
+
+    pub fn iter_unparsables(&self) -> Vec<PyToken> {
+        self.0
+            .iter_unparseables()
+            .into_iter()
+            .map(Into::into)
+            .collect()
+    }
+
+    #[pyo3(signature = (ident=0, tabsize=4, code_only=false))]
+    pub fn stringify(
+        &self,
+        ident: Option<usize>,
+        tabsize: Option<usize>,
+        code_only: Option<bool>,
+    ) -> String {
+        self.0.stringify(
+            ident.unwrap_or(0),
+            tabsize.unwrap_or(4),
+            code_only.unwrap_or_default(),
+        )
+    }
+
+    #[pyo3(signature = (code_only=None, show_raw=None, include_meta=None))]
+    pub fn to_tuple<'py>(
+        &self,
+        py: Python<'py>,
+        code_only: Option<bool>,
+        show_raw: Option<bool>,
+        include_meta: Option<bool>,
+    ) -> Result<Bound<'py, PyTuple>, PyErr> {
+        PyTupleSerialisedSegment(self.0.to_tuple(code_only, show_raw, include_meta)).to_py_tuple(py)
+    }
+
+    // pub fn structural_simplify(&self) -> HashMap<String, Option<serde_json::Value>> {
+    //     self.0
+    //         .structural_simplify()
+    //         .into_iter()
+    //         .map(|(k, v)| (k, v.map(|v| serde_json::to_value(v).unwrap())))
+    //         .collect()
+    // }
+
+    #[pyo3(signature = (segments=None, parent=None, parent_idx=None))]
+    pub fn copy(
+        &self,
+        segments: Option<Vec<PySqlFluffToken>>,
+        parent: Option<PySqlFluffToken>,
+        parent_idx: Option<usize>,
+    ) -> PyToken {
+        PyToken(
+            self.0.copy(
+                segments.map(|s| s.into_iter().map(Into::into).collect()),
+                parent
+                    .as_ref()
+                    .map(|parent_token| Arc::clone(&parent_token.0 .0.clone().into())),
+                parent_idx,
+            ),
+        )
+    }
+
+    #[pyo3(signature = (raw=None, source_fixes=None))]
+    pub fn edit(&self, raw: Option<String>, source_fixes: Option<Vec<PySourceFix>>) -> Self {
+        Self(self.0.edit(
+            raw,
+            source_fixes.map(|sf| sf.into_iter().map(Into::into).collect()),
+        ))
+    }
+
+    #[classmethod]
+    pub fn position_segments<'py>(
+        _cls: &Bound<'py, PyType>,
+        py: Python<'py>,
+        segments: Vec<PySqlFluffToken>,
+        parent_pos: PySqlFluffPositionMarker,
+    ) -> Result<Bound<'py, PyTuple>, PyErr> {
+        let tokens = Token::position_segments(
+            &segments
+                .into_iter()
+                .map(|s| s.into())
+                .collect::<Vec<Token>>(),
+            parent_pos.into(),
+        );
+        PyTuple::new(
+            py,
+            tokens.into_iter().map(Into::into).collect::<Vec<PyToken>>(),
+        )
+    }
+
+    pub fn __repr__(&self) -> String {
+        format!("{}", self)
+    }
+}
+
+impl Display for PyToken {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl From<PyToken> for Token {
+    fn from(value: PyToken) -> Token {
+        value.0
+    }
+}
+
+impl From<Token> for PyToken {
+    fn from(value: Token) -> Self {
+        Self(value)
+    }
+}
+
+#[derive(IntoPyObject)]
+pub struct PySqlFluffToken(pub PyToken);
+
+impl<'py> FromPyObject<'py> for PySqlFluffToken {
+    fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
+        let raw = ob.getattr("raw")?.extract::<String>()?;
+        let class_types = ob
+            .getattr("_class_types")
+            .unwrap_or(ob.getattr("class_types")?)
+            .extract::<HashSet<String>>()?
+            .into_iter()
+            .map(|s| s.to_string())
+            .collect::<HashSet<String>>();
+        let instance_types = ob
+            .getattr("instance_types")?
+            .extract::<Vec<String>>()?
+            .into_iter()
+            .map(|s| s.to_string())
+            .collect::<Vec<String>>();
+        let segments = ob
+            .getattr("segments")?
+            .extract::<Vec<PySqlFluffToken>>()
+            .map(|s| s.into_iter().map(Into::into).collect::<Vec<Token>>())?;
+        let pos_marker = ob
+            .getattr("pos_marker")?
+            .extract::<PySqlFluffPositionMarker>()?;
+
+        use crate::token::config::TokenConfig;
+        Ok(Self(PyToken(Token::base_token(
+            raw,
+            pos_marker.into(),
+            TokenConfig {
+                class_types,
+                instance_types,
+                ..TokenConfig::default()
+            },
+            segments,
+        ))))
+    }
+}
+
+impl From<PySqlFluffToken> for Token {
+    fn from(value: PySqlFluffToken) -> Token {
+        value.0 .0
+    }
+}
+
+impl From<Token> for PySqlFluffToken {
+    fn from(value: Token) -> Self {
+        Self(PyToken(value))
+    }
+}
--- a/src/sqlfluff/core/errors.py
+++ b/src/sqlfluff/core/errors.py
@@ -17,6 +17,12 @@ if TYPE_CHECKING:  # pragma: no cover
    from sqlfluff.core.parser import BaseSegment, PositionMarker
    from sqlfluff.core.rules import BaseRule, LintFix

+    try:
+        from sqlfluffrs import RsSQLLexerError
+    except ImportError:
+        ...
+
+
 CheckTuple = tuple[str, int, int]
 SerializedObject = dict[str, Union[str, int, bool, list["SerializedObject"]]]

@@ -181,6 +187,18 @@ class SQLLexError(SQLBaseError):
    _code = "LXR"
    _identifier = "lexing"

+    @classmethod
+    def from_rs_error(cls, rs_error: "RsSQLLexerError") -> "SQLLexError":
+        """Create a SQLLexError from a RsSQLLexerError."""
+        return cls(
+            description=rs_error.desc,
+            line_no=rs_error.line_no,
+            line_pos=rs_error.line_pos,
+            ignore=rs_error.ignore,
+            fatal=rs_error.fatal,
+            warning=rs_error.warning,
+        )
+

 class SQLParseError(SQLBaseError):
    """An error which occurred during parsing.
--- a/src/sqlfluff/core/linter/common.py
+++ b/src/sqlfluff/core/linter/common.py
@@ -51,7 +51,7 @@ class ParsedVariant(NamedTuple):
        lexing_violations (:obj:`list` of :obj:`SQLLexError`): Any violations
            raised during the lexing phase.
        parsing_violations (:obj:`list` of :obj:`SQLParseError`): Any violations
-            raised during the lexing phase.
+            raised during the parsing phase.
    """

    templated_file: TemplatedFile
--- a/src/sqlfluff/core/parser/init.py
+++ b/src/sqlfluff/core/parser/init.py
@@ -14,7 +14,13 @@ from sqlfluff.core.parser.grammar import (
    Ref,
    Sequence,
 )
-from sqlfluff.core.parser.lexer import Lexer, RegexLexer, StringLexer
+from sqlfluff.core.parser.lexer import (
+    LexerType,
+    PyLexer,
+    RegexLexer,
+    StringLexer,
+    get_lexer_class,
+)
 from sqlfluff.core.parser.markers import PositionMarker
 from sqlfluff.core.parser.matchable import Matchable
 from sqlfluff.core.parser.parser import Parser
@@ -52,6 +58,9 @@ from sqlfluff.core.parser.segments import (
 )
 from sqlfluff.core.parser.types import ParseMode

+# Get the appropriate lexer class (PyRsLexer if available, otherwise PyLexer)
+Lexer = get_lexer_class()
+
 __all__ = (
    "BaseSegment",
    "SourceFix",
@@ -95,6 +104,8 @@ __all__ = (
    "RegexParser",
    "PositionMarker",
    "Lexer",
+    "PyLexer",
+    "LexerType",
    "StringLexer",
    "RegexLexer",
    "Parser",
--- a/src/sqlfluff/core/parser/lexer.py
+++ b/src/sqlfluff/core/parser/lexer.py
@@ -15,7 +15,9 @@ from sqlfluff.core.parser.segments import (
    BaseSegment,
    Dedent,
    EndOfFile,
+    ImplicitIndent,
    Indent,
+    LiteralKeywordSegment,
    MetaSegment,
    RawSegment,
    TemplateLoop,
@@ -723,7 +725,7 @@ def _iter_segments(
        )


-class Lexer:
+class PyLexer:
    """The Lexer class actually does the lexing step."""

    def __init__(
@@ -825,7 +827,9 @@ class Lexer:
        return tuple(segment_buffer)

    @staticmethod
-    def violations_from_segments(segments: tuple[RawSegment, ...]) -> list[SQLLexError]:
+    def violations_from_segments(
+        segments: tuple[RawSegment, ...],
+    ) -> list[SQLLexError]:
        """Generate any lexing errors for any unlexables."""
        violations = []
        for segment in segments:
@@ -887,3 +891,87 @@ class Lexer:
                    f"{template.templated_str[template_slice]!r}"
                )
        return templated_buff
+
+
+try:
+    from sqlfluffrs import RsLexer, RsToken
+
+    def get_segment_type_map(base_class: type) -> dict[str, type[RawSegment]]:
+        """Dynamically create a map of segment types to their subclasses."""
+        segment_map = {}
+        for subclass in base_class.__subclasses__():
+            if subclass is LiteralKeywordSegment or subclass is ImplicitIndent:
+                continue
+            if (
+                hasattr(subclass, "type") and subclass.type
+            ):  # Ensure the subclass has a type
+                segment_map[subclass.type] = subclass
+            # Recursively add subclasses of subclasses
+            segment_map.update(get_segment_type_map(subclass))
+        return segment_map
+
+    # Dynamically generate the segment_types map
+    segment_types = get_segment_type_map(RawSegment)
+
+    class PyRsLexer(RsLexer):
+        """A wrapper around the sqlfluffrs lexer."""
+
+        @staticmethod
+        def _tokens_to_segments(
+            tokens: list["RsToken"], py_template: TemplatedFile
+        ) -> tuple[BaseSegment, ...]:
+            """Convert tokens to segments."""
+            return tuple(
+                segment_types.get(token.type, RawSegment).from_rstoken(
+                    token, py_template
+                )
+                for token in tokens
+            )
+
+        def lex(
+            self, raw: Union[str, TemplatedFile]
+        ) -> tuple[tuple[BaseSegment, ...], list[SQLLexError]]:
+            """Take a string or TemplatedFile and return segments."""
+            tokens, errors = self._lex(raw)
+            first_token = tokens[0]
+            assert first_token
+            template = first_token.pos_marker.templated_file
+            py_template = TemplatedFile(
+                template.source_str,
+                template.fname,
+                template.templated_str,
+                template.sliced_file,  # type: ignore
+                template.raw_sliced,  # type: ignore
+            )
+
+            return (
+                self._tokens_to_segments(tokens, py_template),
+                [SQLLexError.from_rs_error(error) for error in errors],
+            )
+
+    _HAS_RUST_LEXER = True
+    lexer_logger.info("Using sqlfluffrs lexer.")
+except ImportError:
+    PyRsLexer = None  # type: ignore[assignment, misc]
+    _HAS_RUST_LEXER = False
+    lexer_logger.info("sqlfluffrs lexer not present or failed to load.")
+
+
+def get_lexer_class() -> type[Union[PyLexer, "PyRsLexer"]]:
+    """Get the appropriate lexer class based on availability.
+
+    Returns PyRsLexer if the Rust extension is available,
+    otherwise returns PyLexer.
+
+    This function provides a single point of lexer selection,
+    making it easy to instantiate the correct lexer:
+
+        Lexer = get_lexer_class()
+        lexer = Lexer(config=config)
+
+    Returns:
+        The lexer class to use (PyRsLexer or PyLexer).
+    """
+    if _HAS_RUST_LEXER:
+        return PyRsLexer
+    return PyLexer
--- a/src/sqlfluff/core/parser/markers.py
+++ b/src/sqlfluff/core/parser/markers.py
@@ -3,13 +3,15 @@
 This class is a construct to keep track of positions within a file.
 """

+from collections.abc import Sequence
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Optional

 from sqlfluff.core.helpers.slice import zero_slice

-if TYPE_CHECKING:
-    from sqlfluff.core.templaters import TemplatedFile  # pragma: no cover
+if TYPE_CHECKING:  # pragma: no cover
+    from sqlfluff.core.templaters import TemplatedFile
+    from sqlfluffrs import RsPositionMarker


@dataclass(frozen=True)
@@ -124,7 +126,7 @@ class PositionMarker:

    @classmethod
    def from_child_markers(
-        cls, *markers: Optional["PositionMarker"]
+        cls, markers: Sequence[Optional["PositionMarker"]]
    ) -> "PositionMarker":
        """Create a parent marker from it's children."""
        source_slice = slice(
@@ -249,3 +251,16 @@ class PositionMarker:
    def to_source_dict(self) -> dict[str, int]:
        """Serialise the source position."""
        return self.templated_file.source_position_dict_from_slice(self.source_slice)
+
+    @classmethod
+    def from_rs_position_marker(
+        cls,
+        rs_position_marker: "RsPositionMarker",
+        templated_file: "TemplatedFile",
+    ) -> "PositionMarker":
+        """Create a PositionMarker from an RsPositionMarker."""
+        return cls(
+            source_slice=rs_position_marker.source_slice,
+            templated_slice=rs_position_marker.templated_slice,
+            templated_file=templated_file,
+        )
--- a/src/sqlfluff/core/parser/segments/base.py
+++ b/src/sqlfluff/core/parser/segments/base.py
@@ -195,7 +195,7 @@ class BaseSegment(metaclass=SegmentMetaclass):
            # If no pos given, work it out from the children.
            if all(seg.pos_marker for seg in segments):
                pos_marker = PositionMarker.from_child_markers(
-                    *(seg.pos_marker for seg in segments)
+                    [seg.pos_marker for seg in segments]
                )

        assert not hasattr(self, "parse_grammar"), "parse_grammar is deprecated."
--- a/src/sqlfluff/core/parser/segments/meta.py
+++ b/src/sqlfluff/core/parser/segments/meta.py
@@ -1,7 +1,7 @@
 """Indent and Dedent classes."""

 from collections.abc import Sequence
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 from uuid import UUID

 from sqlfluff.core.parser.context import ParseContext
@@ -11,6 +11,9 @@ from sqlfluff.core.parser.segments.base import BaseSegment
 from sqlfluff.core.parser.segments.raw import RawSegment, SourceFix
 from sqlfluff.core.templaters.base import TemplatedFile

+if TYPE_CHECKING:  # pragma: no cover
+    from sqlfluffrs import RsToken
+

 class MetaSegment(RawSegment):
    """A segment which is empty but indicates where something should be."""
@@ -80,6 +83,19 @@ class MetaSegment(RawSegment):
        """
        return None

+    @classmethod
+    def from_rstoken(
+        cls,
+        token: "RsToken",
+        tf: "TemplatedFile",
+    ) -> "MetaSegment":
+        """Create a RawSegment from an RSQL token."""
+        segment = cls(
+            pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
+            block_uuid=token.block_uuid,
+        )
+        return segment
+

 class EndOfFile(MetaSegment):
    """A meta segment to indicate the end of the file."""
@@ -270,3 +286,14 @@ class TemplateSegment(MetaSegment):
            source_fixes=sf,
            block_uuid=self.block_uuid,
        )
+
+    @classmethod
+    def from_rstoken(cls, token: "RsToken", tf: TemplatedFile) -> "TemplateSegment":
+        """Create a TemplateSegment from a token."""
+        segment = cls(
+            pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
+            source_str=token.source_str,
+            block_type=token.block_type,
+            block_uuid=token.block_uuid,
+        )
+        return segment
--- a/src/sqlfluff/core/parser/segments/raw.py
+++ b/src/sqlfluff/core/parser/segments/raw.py
@@ -4,7 +4,7 @@ This is designed to be the root segment, without
 any children, and the output of the lexer.
 """

-from typing import Any, Callable, Optional, Union, cast
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast
 from uuid import uuid4

 import regex as re
@@ -12,6 +12,10 @@ import regex as re
 from sqlfluff.core.parser.markers import PositionMarker
 from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix

+if TYPE_CHECKING:  # pragma: no cover
+    from sqlfluff.core.templaters import TemplatedFile
+    from sqlfluffrs import RsToken
+

 class RawSegment(BaseSegment):
    """This is a segment without any subsegments."""
@@ -299,6 +303,26 @@ class RawSegment(BaseSegment):
            **new_segment_kwargs,
        )

+    @classmethod
+    def from_rstoken(
+        cls,
+        token: "RsToken",
+        tf: "TemplatedFile",
+    ) -> "RawSegment":
+        """Create a RawSegment from an RSQL token."""
+        segment = cls(
+            raw=token.raw,
+            pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
+            instance_types=tuple(token.instance_types),
+            trim_start=token.trim_start,
+            trim_chars=token.trim_chars,
+            source_fixes=token.source_fixes,
+            uuid=token.uuid,
+            quoted_value=token.quoted_value,
+            escape_replacements=token.escape_replacements,
+        )
+        return segment
+

 __all__ = [
    "PositionMarker",
--- a/Show More
+++ b/Show More