mirror of
https://github.com/sqlfluff/sqlfluff
synced 2025-12-17 19:31:32 +00:00
Write an implementation of the lexer in Rust (#7132)
Co-authored-by: Alan Cruickshank <alanmcruickshank@gmail.com>
This commit is contained in:
19
.github/workflows/ci-test-python.yml
vendored
19
.github/workflows/ci-test-python.yml
vendored
@@ -25,12 +25,15 @@ on:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
with-rust:
|
||||
required: true
|
||||
type: string
|
||||
secrets:
|
||||
gh_token:
|
||||
required: true
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.python-version }}-${{ inputs.marks }}-${{ inputs.coverage }}
|
||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ inputs.python-version }}-${{ inputs.marks }}-${{ inputs.coverage }}-${{ inputs.with-rust }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
@@ -49,6 +52,14 @@ jobs:
|
||||
setup.cfg
|
||||
requirements_dev.txt
|
||||
|
||||
- name: Download built wheels
|
||||
if: ${{ inputs.with_rust }} == '-rust'
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: ./dist
|
||||
pattern: wheels-*
|
||||
merge-multiple: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: pip install tox
|
||||
|
||||
@@ -69,10 +80,10 @@ jobs:
|
||||
# NOTE: We have a separate job for coverage reporting because
|
||||
# it impacts performance and slows the test suite significantly.
|
||||
if: ${{ inputs.coverage }}
|
||||
run: tox -e py${{ steps.py_version.outputs.PYVERSION }} -- --cov=sqlfluff -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
|
||||
run: tox -e py${{ steps.py_version.outputs.PYVERSION }}${{ inputs.with-rust }} -- --cov=sqlfluff -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
|
||||
- name: Run the tests (without coverage)
|
||||
if: ${{ !inputs.coverage }}
|
||||
run: tox -e py${{ steps.py_version.outputs.PYVERSION }} -- -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
|
||||
run: tox -e py${{ steps.py_version.outputs.PYVERSION }}${{ inputs.with-rust }} -- -n 2 test -m "${{ inputs.marks }}" --durations=16 --verbosity=0
|
||||
|
||||
- name: Rename coverage files with suffix
|
||||
# NOTE: We do this because we're using the same tox environment for multiple
|
||||
@@ -88,7 +99,7 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.coverage }}
|
||||
with:
|
||||
name: coverage-data-py${{ inputs.python-version }}-${{ inputs.marks }}
|
||||
name: coverage-data-py${{ inputs.python-version }}-${{ inputs.marks }}${{ inputs.with-rust }}
|
||||
path: ".coverage.*"
|
||||
if-no-files-found: ignore
|
||||
include-hidden-files: true
|
||||
|
||||
144
.github/workflows/ci-tests.yml
vendored
144
.github/workflows/ci-tests.yml
vendored
@@ -45,6 +45,7 @@ jobs:
|
||||
"mypy",
|
||||
"mypyc",
|
||||
"doctests",
|
||||
"check-rs",
|
||||
]
|
||||
include:
|
||||
# Default to most recent python version
|
||||
@@ -64,9 +65,144 @@ jobs:
|
||||
- name: Run the tests
|
||||
run: tox -e ${{ matrix.job }}
|
||||
|
||||
rs-build-linux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-latest
|
||||
target: x86_64
|
||||
- runner: ubuntu-latest
|
||||
target: x86
|
||||
- runner: ubuntu-latest
|
||||
target: aarch64
|
||||
- runner: ubuntu-latest
|
||||
target: armv7
|
||||
- runner: ubuntu-latest
|
||||
target: s390x
|
||||
- runner: ubuntu-latest
|
||||
target: ppc64le
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
manylinux: auto
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-linux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
rs-build-musllinux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-latest
|
||||
target: x86_64
|
||||
- runner: ubuntu-latest
|
||||
target: x86
|
||||
- runner: ubuntu-latest
|
||||
target: aarch64
|
||||
- runner: ubuntu-latest
|
||||
target: armv7
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
manylinux: musllinux_1_2
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-musllinux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
rs-build-windows:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: windows-latest
|
||||
target: x64
|
||||
- runner: windows-latest
|
||||
target: x86
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: ${{ matrix.platform.target }}
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-windows-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
rs-build-macos:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: macos-13
|
||||
target: x86_64
|
||||
- runner: macos-14
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-macos-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
rs-build-sdist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build sdist
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: sdist
|
||||
args: --out dist --manifest-path sqlfluffrs/Cargo.toml
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-sdist
|
||||
path: dist
|
||||
|
||||
# Test with coverage tracking on most recent python (py313).
|
||||
python-version-tests:
|
||||
name: Python Tests
|
||||
needs: rs-build-linux
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ]
|
||||
@@ -77,6 +213,7 @@ jobs:
|
||||
# Override coverage to be true for most recent python version.
|
||||
- python-version: "3.13"
|
||||
coverage: true
|
||||
with-rust: [ "-rust", "" ]
|
||||
permissions:
|
||||
contents: read
|
||||
pull-requests: write
|
||||
@@ -84,6 +221,7 @@ jobs:
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
coverage: ${{ matrix.coverage }}
|
||||
with-rust: ${{ matrix.with-rust }}
|
||||
secrets:
|
||||
gh_token: ${{ secrets.github_token }}
|
||||
|
||||
@@ -114,9 +252,12 @@ jobs:
|
||||
gh_token: ${{ secrets.github_token }}
|
||||
|
||||
dialect-tests:
|
||||
name: Dialect ${{ matrix.marks }}
|
||||
name: Dialect ${{ matrix.marks }}${{ matrix.with-rust }}
|
||||
needs: rs-build-linux
|
||||
strategy:
|
||||
matrix:
|
||||
marks: [ "parse_suite", "fix_suite", "rules_suite" ]
|
||||
with-rust: [ "-rust", "" ]
|
||||
include:
|
||||
# This runs the bulk of the dialect _parsing_ tests.
|
||||
#
|
||||
@@ -149,6 +290,7 @@ jobs:
|
||||
python-version: "3.13"
|
||||
marks: ${{ matrix.marks }}
|
||||
coverage: ${{ matrix.coverage }}
|
||||
with-rust: ${{ matrix.with-rust }}
|
||||
secrets:
|
||||
gh_token: ${{ secrets.github_token }}
|
||||
|
||||
|
||||
172
.github/workflows/publish-sqlfluffrs-release-to-pypi.yaml
vendored
Normal file
172
.github/workflows/publish-sqlfluffrs-release-to-pypi.yaml
vendored
Normal file
@@ -0,0 +1,172 @@
|
||||
name: Publish SQLFluff-rs PyPI Version
|
||||
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- published
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
linux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-22.04
|
||||
target: x86_64
|
||||
- runner: ubuntu-22.04
|
||||
target: x86
|
||||
- runner: ubuntu-22.04
|
||||
target: aarch64
|
||||
- runner: ubuntu-22.04
|
||||
target: armv7
|
||||
- runner: ubuntu-22.04
|
||||
target: s390x
|
||||
- runner: ubuntu-22.04
|
||||
target: ppc64le
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
manylinux: auto
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-linux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
musllinux:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: ubuntu-22.04
|
||||
target: x86_64
|
||||
- runner: ubuntu-22.04
|
||||
target: x86
|
||||
- runner: ubuntu-22.04
|
||||
target: aarch64
|
||||
- runner: ubuntu-22.04
|
||||
target: armv7
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
manylinux: musllinux_1_2
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-musllinux-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
windows:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: windows-latest
|
||||
target: x64
|
||||
- runner: windows-latest
|
||||
target: x86
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
architecture: ${{ matrix.platform.target }}
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-windows-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
macos:
|
||||
runs-on: ${{ matrix.platform.runner }}
|
||||
strategy:
|
||||
matrix:
|
||||
platform:
|
||||
- runner: macos-13
|
||||
target: x86_64
|
||||
- runner: macos-14
|
||||
target: aarch64
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.x
|
||||
- name: Build wheels
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
target: ${{ matrix.platform.target }}
|
||||
args: --release --out dist --find-interpreter --manifest-path sqlfluffrs/Cargo.toml
|
||||
sccache: ${{ !startsWith(github.ref, 'refs/tags/') }}
|
||||
- name: Upload wheels
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-macos-${{ matrix.platform.target }}
|
||||
path: dist
|
||||
|
||||
sdist:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build sdist
|
||||
uses: PyO3/maturin-action@v1
|
||||
with:
|
||||
command: sdist
|
||||
args: --out dist --manifest-path sqlfluffrs/Cargo.toml
|
||||
- name: Upload sdist
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: wheels-sdist
|
||||
path: dist
|
||||
|
||||
release:
|
||||
name: Release
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ startsWith(github.ref, 'refs/tags/') || github.event_name == 'workflow_dispatch' }}
|
||||
needs: [linux, musllinux, windows, macos, sdist]
|
||||
permissions:
|
||||
# Use to sign the release artifacts
|
||||
id-token: write
|
||||
# Used to upload release artifacts
|
||||
contents: write
|
||||
# Used to generate artifact attestation
|
||||
attestations: write
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
- name: Generate artifact attestation
|
||||
uses: actions/attest-build-provenance@v2
|
||||
with:
|
||||
subject-path: 'wheels-*/*'
|
||||
- name: Publish to PyPI
|
||||
if: ${{ startsWith(github.ref, 'refs/tags/') }}
|
||||
uses: PyO3/maturin-action@v1
|
||||
env:
|
||||
MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
||||
with:
|
||||
command: upload
|
||||
args: --non-interactive --skip-existing wheels-*/*
|
||||
@@ -36,7 +36,7 @@ repos:
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.14.1
|
||||
rev: v1.18.1
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
@@ -58,7 +58,7 @@ repos:
|
||||
pathspec,
|
||||
pytest, # and by extension... pluggy
|
||||
click,
|
||||
platformdirs
|
||||
platformdirs,
|
||||
]
|
||||
files: ^src/sqlfluff/.*
|
||||
# The mypy pre-commit hook by default sets a few arguments that we don't normally
|
||||
|
||||
@@ -224,6 +224,10 @@ for development, and which parts of the test suite you may find most useful.
|
||||
runs to specific dialects to further improve iteration speed. e.g.
|
||||
- `tox -e generate-fixture-yml -- -d mysql` will run just the mysql tests.
|
||||
- `python test/generate_parse_fixture_yml.py -d mysql` will do the same.
|
||||
As you make changes to a dialect, you will also need to regenerate the Rust
|
||||
dialects to keep them in sync. To do this, run `tox -e generate-rs` (if using
|
||||
tox), or, with sqlfluff installed in a virtual environment, run
|
||||
`utils/rustify.py build` to resync the languages.
|
||||
2. Developing for the dbt templater should only require running the dbt test
|
||||
suite (see below).
|
||||
3. Developing rules and rule plugins there are a couple of scenarios.
|
||||
|
||||
@@ -7,12 +7,10 @@ build-backend = "setuptools.build_meta"
|
||||
name = "sqlfluff"
|
||||
version = "3.5.0"
|
||||
description = "The SQL Linter for Humans"
|
||||
readme = {file = "README.md", content-type = "text/markdown"}
|
||||
readme = { file = "README.md", content-type = "text/markdown" }
|
||||
requires-python = ">=3.9"
|
||||
authors = [
|
||||
{name = "Alan Cruickshank", email = "alan@designingoverload.com"},
|
||||
]
|
||||
license = {file = "LICENSE.md"}
|
||||
authors = [{ name = "Alan Cruickshank", email = "alan@designingoverload.com" }]
|
||||
license = { file = "LICENSE.md" }
|
||||
classifiers = [
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Environment :: Console",
|
||||
@@ -99,6 +97,9 @@ dependencies = [
|
||||
"tqdm",
|
||||
]
|
||||
|
||||
[project.optional-dependencies]
|
||||
rs = ["sqlfluffrs~=0.1.0"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://www.sqlfluff.com"
|
||||
Documentation = "https://docs.sqlfluff.com"
|
||||
@@ -148,9 +149,7 @@ root_package = "sqlfluff"
|
||||
[[tool.importlinter.contracts]]
|
||||
name = "Forbid dependencies outside core"
|
||||
type = "forbidden"
|
||||
source_modules = [
|
||||
"sqlfluff.core",
|
||||
]
|
||||
source_modules = ["sqlfluff.core"]
|
||||
forbidden_modules = [
|
||||
"sqlfluff.api",
|
||||
"sqlfluff.cli",
|
||||
@@ -162,12 +161,8 @@ forbidden_modules = [
|
||||
[[tool.importlinter.contracts]]
|
||||
name = "API may not depend on CLI"
|
||||
type = "forbidden"
|
||||
source_modules = [
|
||||
"sqlfluff.api",
|
||||
]
|
||||
forbidden_modules = [
|
||||
"sqlfluff.cli",
|
||||
]
|
||||
source_modules = ["sqlfluff.api"]
|
||||
forbidden_modules = ["sqlfluff.cli"]
|
||||
|
||||
[[tool.importlinter.contracts]]
|
||||
name = "Helper methods must be internally independent"
|
||||
@@ -222,6 +217,7 @@ warn_unused_ignores = true
|
||||
strict_equality = true
|
||||
extra_checks = true
|
||||
no_implicit_reexport = true
|
||||
mypy_path = "$MYPY_CONFIG_FILE_DIR/sqlfluffrs"
|
||||
|
||||
# skip type checking for 3rd party packages for which stubs are not available
|
||||
[[tool.mypy.overrides]]
|
||||
@@ -232,7 +228,6 @@ ignore_missing_imports = true
|
||||
module = "tblib.*"
|
||||
ignore_missing_imports = true
|
||||
|
||||
|
||||
[tool.ruff.lint]
|
||||
extend-select = ["I", "D"]
|
||||
|
||||
@@ -280,7 +275,7 @@ ignore-path = "docs/source/_partials/"
|
||||
skip = "*/test/fixtures/*,*/.*,*/pyproject.toml"
|
||||
|
||||
check-hidden = true
|
||||
quiet-level=2
|
||||
quiet-level = 2
|
||||
# ignore-regex = '\\[fnrstv]'
|
||||
builtin = "clear,rare,informal,names"
|
||||
|
||||
@@ -288,7 +283,7 @@ ignore-words-list = "fo,ws,falsy,coo,inout,deque,crate,trough,ro,mange,identifer
|
||||
|
||||
# ignore-words = "dev/tools/codespell/codespell-ignore.txt"
|
||||
# exclude-file = "dev/tools/codespell/codespell-lines-ignore.txt"
|
||||
uri-ignore-words-list="crate"
|
||||
uri-ignore-words-list = "crate"
|
||||
|
||||
# For future reference: it is not currently possible to specify
|
||||
# the standard dictionary and the custom dictionary in the configuration
|
||||
|
||||
72
sqlfluffrs/.gitignore
vendored
Normal file
72
sqlfluffrs/.gitignore
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
/target
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
.pytest_cache/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
.venv/
|
||||
env/
|
||||
bin/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
include/
|
||||
man/
|
||||
venv/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
pip-selfcheck.json
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
|
||||
# Mr Developer
|
||||
.mr.developer.cfg
|
||||
.project
|
||||
.pydevproject
|
||||
|
||||
# Rope
|
||||
.ropeproject
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
*.pot
|
||||
|
||||
.DS_Store
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyCharm
|
||||
.idea/
|
||||
|
||||
# VSCode
|
||||
.vscode/
|
||||
|
||||
# Pyenv
|
||||
.python-version
|
||||
731
sqlfluffrs/Cargo.lock
generated
Normal file
731
sqlfluffrs/Cargo.lock
generated
Normal file
@@ -0,0 +1,731 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle"
|
||||
version = "1.0.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
|
||||
dependencies = [
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-wincon"
|
||||
version = "3.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"once_cell",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "arc-swap"
|
||||
version = "1.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457"
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
||||
|
||||
[[package]]
|
||||
name = "bumpalo"
|
||||
version = "3.19.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "colorchoice"
|
||||
version = "1.0.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
|
||||
|
||||
[[package]]
|
||||
name = "either"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "186e05a59d4c50738528153b83b0b0194d3a29507dfec16eccd4b342903397d0"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"jiff",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.16.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "998b056554fbe42e03ae0e152895cd1a7e1002aec800fdc6635d20270260c46f"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "foldhash"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"r-efi",
|
||||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hashbrown"
|
||||
version = "0.15.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
|
||||
dependencies = [
|
||||
"allocator-api2",
|
||||
"equivalent",
|
||||
"foldhash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
|
||||
|
||||
[[package]]
|
||||
name = "indoc"
|
||||
version = "2.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
|
||||
|
||||
[[package]]
|
||||
name = "is_terminal_polyfill"
|
||||
version = "1.70.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf"
|
||||
|
||||
[[package]]
|
||||
name = "itertools"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
|
||||
dependencies = [
|
||||
"either",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
|
||||
|
||||
[[package]]
|
||||
name = "jiff"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49"
|
||||
dependencies = [
|
||||
"jiff-static",
|
||||
"log",
|
||||
"portable-atomic",
|
||||
"portable-atomic-util",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff-static"
|
||||
version = "0.2.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "js-sys"
|
||||
version = "0.3.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "852f13bec5eba4ba9afbeb93fd7c13fe56147f055939ae21c43a29a0ecb2702e"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.169"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.9.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.21.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
|
||||
|
||||
[[package]]
|
||||
name = "portable-atomic-util"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507"
|
||||
dependencies = [
|
||||
"portable-atomic",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.93"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ba0117f4212101ee6544044dae45abe1083d30ce7b29c4b5cbdfa2354e07383"
|
||||
dependencies = [
|
||||
"hashbrown",
|
||||
"indoc",
|
||||
"libc",
|
||||
"memoffset",
|
||||
"once_cell",
|
||||
"portable-atomic",
|
||||
"pyo3-build-config",
|
||||
"pyo3-ffi",
|
||||
"pyo3-macros",
|
||||
"unindent",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-build-config"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fc6ddaf24947d12a9aa31ac65431fb1b851b8f4365426e182901eabfb87df5f"
|
||||
dependencies = [
|
||||
"target-lexicon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-ffi"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "025474d3928738efb38ac36d4744a74a400c901c7596199e20e45d98eb194105"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"pyo3-build-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-log"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "833e6fdc21553e9938d9443050ed3c7787ac3c1a1aefccbd03dfae0c7a4be529"
|
||||
dependencies = [
|
||||
"arc-swap",
|
||||
"log",
|
||||
"pyo3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2e64eb489f22fe1c95911b77c44cc41e7c19f3082fc81cce90f657cdc42ffded"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"pyo3-macros-backend",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pyo3-macros-backend"
|
||||
version = "0.26.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "100246c0ecf400b475341b8455a9213344569af29a3c841d29270e53102e0fcf"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"proc-macro2",
|
||||
"pyo3-build-config",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "r-efi"
|
||||
version = "5.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.11.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.22"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.225"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd6c24dee235d0da097043389623fb913daddf92c76e9f5a1db88607a0bcbd1d"
|
||||
dependencies = [
|
||||
"serde_core",
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_core"
|
||||
version = "1.0.225"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "659356f9a0cb1e529b24c01e43ad2bdf520ec4ceaf83047b83ddcc2251f96383"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.225"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ea936adf78b1f766949a4977b91d2f5595825bd6ec079aa9543ad2685fc4516"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.145"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"memchr",
|
||||
"ryu",
|
||||
"serde",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "slotmap"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dbff4acf519f630b3a3ddcfaea6c06b42174d9a44bc70c620e9ed1649d58b82a"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sqlfluffrs"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"env_logger",
|
||||
"fancy-regex",
|
||||
"hashbrown",
|
||||
"itertools",
|
||||
"log",
|
||||
"once_cell",
|
||||
"pyo3",
|
||||
"pyo3-log",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"slotmap",
|
||||
"uuid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.106"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "target-lexicon"
|
||||
version = "0.13.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
|
||||
|
||||
[[package]]
|
||||
name = "unindent"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
|
||||
|
||||
[[package]]
|
||||
name = "uuid"
|
||||
version = "1.18.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"js-sys",
|
||||
"wasm-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "version_check"
|
||||
version = "0.9.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.14.7+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "883478de20367e224c0090af9cf5f9fa85bed63a95c1abf3afc5c083ebc06e8c"
|
||||
dependencies = [
|
||||
"wasip2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasip2"
|
||||
version = "1.0.1+wasi-0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
|
||||
dependencies = [
|
||||
"wit-bindgen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ab10a69fbd0a177f5f649ad4d8d3305499c42bab9aef2f7ff592d0ec8f833819"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
"rustversion",
|
||||
"wasm-bindgen-macro",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-backend"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0bb702423545a6007bbc368fde243ba47ca275e549c8a28617f56f6ba53b1d1c"
|
||||
dependencies = [
|
||||
"bumpalo",
|
||||
"log",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fc65f4f411d91494355917b605e1480033152658d71f722a90647f56a70c88a0"
|
||||
dependencies = [
|
||||
"quote",
|
||||
"wasm-bindgen-macro-support",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-macro-support"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ffc003a991398a8ee604a401e194b6b3a39677b3173d6e74495eb51b82e99a32"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
"wasm-bindgen-backend",
|
||||
"wasm-bindgen-shared",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wasm-bindgen-shared"
|
||||
version = "0.2.103"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "293c37f4efa430ca14db3721dfbe48d8c33308096bd44d80ebaa775ab71ba1cf"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
|
||||
dependencies = [
|
||||
"windows_aarch64_gnullvm",
|
||||
"windows_aarch64_msvc",
|
||||
"windows_i686_gnu",
|
||||
"windows_i686_gnullvm",
|
||||
"windows_i686_msvc",
|
||||
"windows_x86_64_gnu",
|
||||
"windows_x86_64_gnullvm",
|
||||
"windows_x86_64_msvc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
|
||||
|
||||
[[package]]
|
||||
name = "windows_aarch64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_i686_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnu"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_gnullvm"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
|
||||
|
||||
[[package]]
|
||||
name = "windows_x86_64_msvc"
|
||||
version = "0.52.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "wit-bindgen"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
|
||||
32
sqlfluffrs/Cargo.toml
Normal file
32
sqlfluffrs/Cargo.toml
Normal file
@@ -0,0 +1,32 @@
|
||||
[package]
|
||||
name = "sqlfluffrs"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[lib]
|
||||
name = "sqlfluffrs"
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[features]
|
||||
unicode = []
|
||||
python = ["unicode", "pyo3"]
|
||||
|
||||
[dependencies]
|
||||
env_logger = "0.11.8"
|
||||
fancy-regex = "0.16.2"
|
||||
hashbrown = "0.15.5"
|
||||
itertools = "0.14.0"
|
||||
log = "0.4.28"
|
||||
once_cell = "1.21.3"
|
||||
pyo3 = { version = "0.26.0", optional = true, features = ["hashbrown", "extension-module", "uuid"] }
|
||||
pyo3-log = { version = "0.13.0", optional = true }
|
||||
regex = { version = "1.11.2", features = ["perf"] }
|
||||
slotmap = "1.0.7"
|
||||
uuid = { version = "1.18.1", features = ["v4"] }
|
||||
serde = { version = "1.0.225", features = ["derive"] }
|
||||
serde_json = "1.0.145"
|
||||
bincode = "1.3.3"
|
||||
|
||||
[dev-dependencies]
|
||||
env_logger = "0.11.6"
|
||||
21
sqlfluffrs/LICENSE.md
Normal file
21
sqlfluffrs/LICENSE.md
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2025 Alan Cruickshank
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
20
sqlfluffrs/README.md
Normal file
20
sqlfluffrs/README.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# SQLFluff-rs
|
||||
|
||||
This package is an optional installation for [SQLFluff](https://github.com/sqlfluff/sqlfluff) and is **not** intended to be used as a standalone linting solution.
|
||||
|
||||
## Purpose
|
||||
|
||||
SQLFluff-rs serves as a Rust-based component that can be integrated with the main SQLFluff package. It is currently in development and should be considered experimental.
|
||||
|
||||
## Installation
|
||||
|
||||
This package is automatically handled when installing SQLFluff with the appropriate optional dependencies. Direct installation or standalone usage is not supported.
|
||||
|
||||
To install from pip:
|
||||
```sh
|
||||
pip install sqlfluff[rs]
|
||||
```
|
||||
|
||||
## Development Status
|
||||
|
||||
This is a supplementary component and is not meant to replace or function independently of the main SQLFluff package. For SQL linting, please use the main [SQLFluff](https://github.com/sqlfluff/sqlfluff) package.
|
||||
0
sqlfluffrs/py.typed
Normal file
0
sqlfluffrs/py.typed
Normal file
43
sqlfluffrs/pyproject.toml
Normal file
43
sqlfluffrs/pyproject.toml
Normal file
@@ -0,0 +1,43 @@
|
||||
[build-system]
|
||||
requires = ["maturin>=1.8,<2.0"]
|
||||
build-backend = "maturin"
|
||||
|
||||
[project]
|
||||
name = "sqlfluffrs"
|
||||
readme = { file = "README.md", content-type = "text/markdown" }
|
||||
license = { file = "LICENSE.md" }
|
||||
description = "The SQL Linter for Humans"
|
||||
requires-python = ">=3.9"
|
||||
classifiers = [
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Environment :: Console",
|
||||
"Intended Audience :: Developers",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Operating System :: Unix",
|
||||
"Operating System :: POSIX",
|
||||
"Operating System :: MacOS",
|
||||
"Operating System :: Microsoft :: Windows",
|
||||
"Programming Language :: Rust",
|
||||
"Programming Language :: Python",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
"Programming Language :: Python :: 3.11",
|
||||
"Programming Language :: Python :: 3.12",
|
||||
"Programming Language :: Python :: 3.13",
|
||||
"Programming Language :: Python :: Implementation :: CPython",
|
||||
"Programming Language :: Python :: Implementation :: PyPy",
|
||||
"Programming Language :: SQL",
|
||||
"Topic :: Utilities",
|
||||
"Topic :: Software Development :: Quality Assurance",
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
[project.urls]
|
||||
Homepage = "https://www.sqlfluff.com"
|
||||
Documentation = "https://docs.sqlfluff.com"
|
||||
Source = "https://github.com/sqlfluff/sqlfluff"
|
||||
"Issue Tracker" = "https://github.com/sqlfluff/sqlfluff/issues"
|
||||
|
||||
[tool.maturin]
|
||||
features = ["pyo3/extension-module", "python"]
|
||||
143
sqlfluffrs/sqlfluffrs.pyi
Normal file
143
sqlfluffrs/sqlfluffrs.pyi
Normal file
@@ -0,0 +1,143 @@
|
||||
from typing import TYPE_CHECKING, Any, List, Optional, Tuple, Union
|
||||
from uuid import UUID
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sqlfluff.core.config import FluffConfig
|
||||
from sqlfluff.core.parser.lexer import StringLexer
|
||||
from sqlfluff.core.parser.segments import SourceFix
|
||||
from sqlfluff.core.templaters import TemplatedFile
|
||||
|
||||
SerializedObject = dict[str, Union[str, int, bool, list["SerializedObject"]]]
|
||||
TupleSerialisedSegment = tuple[str, Union[str, tuple["TupleSerialisedSegment", ...]]]
|
||||
|
||||
class Slice: ...
|
||||
|
||||
class RsRawFileSlice:
|
||||
raw: str
|
||||
slice_type: str
|
||||
source_idx: int
|
||||
block_idx: int
|
||||
tag: Optional[str]
|
||||
|
||||
class RsTemplatedFileSlice:
|
||||
slice_type: str
|
||||
source_slice: Slice
|
||||
templated_slice: Slice
|
||||
|
||||
class RsTemplatedFile:
|
||||
source_str: str
|
||||
fname: str
|
||||
templated_str: str
|
||||
sliced_file: List[RsTemplatedFileSlice]
|
||||
raw_sliced: List[RsRawFileSlice]
|
||||
|
||||
class RsPositionMarker:
|
||||
source_slice: slice
|
||||
templated_slice: slice
|
||||
templated_file: RsTemplatedFile
|
||||
working_line_no: int
|
||||
working_line_pos: int
|
||||
|
||||
class RsToken:
|
||||
raw: str
|
||||
pos_marker: RsPositionMarker
|
||||
type: str
|
||||
uuid: Optional[int]
|
||||
source_fixes: Optional[list["SourceFix"]]
|
||||
|
||||
def raw_trimmed(self) -> str: ...
|
||||
@property
|
||||
def is_templated(self) -> bool: ...
|
||||
@property
|
||||
def is_code(self) -> bool: ...
|
||||
@property
|
||||
def is_meta(self) -> bool: ...
|
||||
@property
|
||||
def source_str(self) -> str: ...
|
||||
@property
|
||||
def block_type(self) -> str: ...
|
||||
@property
|
||||
def block_uuid(self) -> Optional[UUID]: ...
|
||||
@property
|
||||
def cache_key(self) -> str: ...
|
||||
@property
|
||||
def trim_start(self) -> Optional[tuple[str]]: ...
|
||||
@property
|
||||
def trim_chars(self) -> Optional[tuple[str]]: ...
|
||||
@property
|
||||
def quoted_value(self) -> Optional[tuple[str, int | str]]: ...
|
||||
@property
|
||||
def escape_replacements(self) -> Optional[list[tuple[str, str]]]: ...
|
||||
def count_segments(self, raw_only: bool = False) -> int: ...
|
||||
def get_type(self) -> str: ...
|
||||
def recursive_crawl(
|
||||
self,
|
||||
seg_type: Tuple[str, ...],
|
||||
recurse_into: bool,
|
||||
no_recursive_seg_type: Optional[Union[str, List[str]]] = None,
|
||||
allow_self: bool = True,
|
||||
) -> List["RsToken"]: ...
|
||||
def recursive_crawl_all(self, reverse: bool) -> List["RsToken"]: ...
|
||||
@property
|
||||
def segments(self) -> List["RsToken"]: ...
|
||||
def path_to(self, other: "RsToken") -> List[Any]: ...
|
||||
def get_start_loc(self) -> Tuple[int, int]: ...
|
||||
def get_end_loc(self) -> Tuple[int, int]: ...
|
||||
@property
|
||||
def raw_segments(self) -> List["RsToken"]: ...
|
||||
def copy(
|
||||
self,
|
||||
segments: Optional[List["RsToken"]] = None,
|
||||
parent: Optional[Any] = None,
|
||||
parent_idx: Optional[int] = None,
|
||||
) -> "RsToken": ...
|
||||
def edit(
|
||||
self,
|
||||
raw: Optional[str] = None,
|
||||
source_fixes: Optional[List[Any]] = None,
|
||||
) -> "RsToken": ...
|
||||
def to_tuple(
|
||||
self,
|
||||
code_only: Optional[bool] = None,
|
||||
show_raw: Optional[bool] = None,
|
||||
include_meta: Optional[bool] = None,
|
||||
) -> TupleSerialisedSegment: ...
|
||||
def __repr__(self) -> str: ...
|
||||
@property
|
||||
def instance_types(self) -> List[str]: ...
|
||||
|
||||
class RsSQLLexerError:
|
||||
desc: str
|
||||
line_no: int
|
||||
line_pos: int
|
||||
ignore: bool
|
||||
warning: bool
|
||||
fatal: bool
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
msg: Optional[str] = None,
|
||||
pos: Optional[RsPositionMarker] = None,
|
||||
line_no: int = 0,
|
||||
line_pos: int = 0,
|
||||
ignore: bool = False,
|
||||
warning: bool = False,
|
||||
fatal: bool = False,
|
||||
) -> None: ...
|
||||
def rule_code(self) -> str: ...
|
||||
def rule_name(self) -> str: ...
|
||||
def source_signature(self) -> Tuple[Tuple[str, int, int], str]: ...
|
||||
def to_dict(self) -> SerializedObject: ...
|
||||
def ignore_if_in(self, ignore_iterable: list[str]) -> None: ...
|
||||
def warning_if_in(self, ignore_iterable: list[str]) -> None: ...
|
||||
|
||||
class RsLexer:
|
||||
def __init__(
|
||||
self,
|
||||
config: Optional["FluffConfig"] = None,
|
||||
last_resort_lexer: Optional["StringLexer"] = None,
|
||||
dialect: Optional[str] = None,
|
||||
): ...
|
||||
def _lex(
|
||||
self, lex_input: Union[str, "TemplatedFile"]
|
||||
) -> Tuple[List[RsToken], List[Any]]: ...
|
||||
44
sqlfluffrs/src/config/fluffconfig.rs
Normal file
44
sqlfluffrs/src/config/fluffconfig.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
#[derive(Clone)]
|
||||
pub struct FluffConfig {
|
||||
pub dialect: Option<String>,
|
||||
pub template_blocks_indent: bool,
|
||||
}
|
||||
|
||||
impl FluffConfig {
|
||||
pub fn new(dialect: Option<String>, template_blocks_indent: bool) -> Self {
|
||||
Self {
|
||||
dialect,
|
||||
template_blocks_indent,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python {
|
||||
use pyo3::{
|
||||
prelude::*,
|
||||
types::{PyDict, PyDictMethods},
|
||||
};
|
||||
|
||||
use super::FluffConfig;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PyFluffConfig(pub FluffConfig);
|
||||
|
||||
impl<'py> FromPyObject<'py> for PyFluffConfig {
|
||||
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
|
||||
let configs = ob.getattr("_configs")?;
|
||||
let configs_dict = configs.downcast::<PyDict>()?;
|
||||
let core = configs_dict.get_item("core").ok().flatten().unwrap();
|
||||
let core_dict = core.downcast::<PyDict>()?;
|
||||
let dialect = core_dict
|
||||
.get_item("dialect")
|
||||
.ok()
|
||||
.flatten()
|
||||
.and_then(|x| x.extract::<String>().ok());
|
||||
|
||||
// println!("{:?}", dialect);
|
||||
Ok(Self(FluffConfig::new(dialect, true)))
|
||||
}
|
||||
}
|
||||
}
|
||||
1
sqlfluffrs/src/config/mod.rs
Normal file
1
sqlfluffrs/src/config/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod fluffconfig;
|
||||
895
sqlfluffrs/src/dialect/ansi/matcher.rs
Normal file
895
sqlfluffrs/src/dialect/ansi/matcher.rs
Normal file
@@ -0,0 +1,895 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static ANSI_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"CASE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"FULL".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SET".to_string(),
|
||||
"UNION".to_string(),
|
||||
"USING".to_string(),
|
||||
]});
|
||||
|
||||
pub static ANSI_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"inline_comment",
|
||||
r#"(--|#)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("--"), String::from("#")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Ansi,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Ansi,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.|'')*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"double_quote",
|
||||
r#""(""|[^"\\]|\\.)*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"numeric_literal",
|
||||
r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::literal_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Ansi,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/ansi/mod.rs
Normal file
5
sqlfluffrs/src/dialect/ansi/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1028
sqlfluffrs/src/dialect/athena/matcher.rs
Normal file
1028
sqlfluffrs/src/dialect/athena/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/athena/mod.rs
Normal file
5
sqlfluffrs/src/dialect/athena/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1083
sqlfluffrs/src/dialect/bigquery/matcher.rs
Normal file
1083
sqlfluffrs/src/dialect/bigquery/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/bigquery/mod.rs
Normal file
5
sqlfluffrs/src/dialect/bigquery/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
916
sqlfluffrs/src/dialect/clickhouse/matcher.rs
Normal file
916
sqlfluffrs/src/dialect/clickhouse/matcher.rs
Normal file
@@ -0,0 +1,916 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static CLICKHOUSE_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"CASE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"FULL".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SET".to_string(),
|
||||
"UNION".to_string(),
|
||||
"USING".to_string(),
|
||||
]});
|
||||
|
||||
pub static CLICKHOUSE_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"inline_comment",
|
||||
r#"(--|#)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("--"), String::from("#")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Clickhouse,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Clickhouse,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.|'')*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"double_quote",
|
||||
r#""([^"\\]|""|\\.)*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"\\]|""|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"(""|\\")"#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|``|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|``|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"(``|\\`)"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"numeric_literal",
|
||||
r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::literal_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"lambda",
|
||||
"->",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::symbol_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Clickhouse,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/clickhouse/mod.rs
Normal file
5
sqlfluffrs/src/dialect/clickhouse/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1228
sqlfluffrs/src/dialect/databricks/matcher.rs
Normal file
1228
sqlfluffrs/src/dialect/databricks/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/databricks/mod.rs
Normal file
5
sqlfluffrs/src/dialect/databricks/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
915
sqlfluffrs/src/dialect/db2/matcher.rs
Normal file
915
sqlfluffrs/src/dialect/db2/matcher.rs
Normal file
@@ -0,0 +1,915 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static DB2_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"CASE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"FULL".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SET".to_string(),
|
||||
"UNION".to_string(),
|
||||
"USING".to_string(),
|
||||
]});
|
||||
|
||||
pub static DB2_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"inline_comment",
|
||||
r#"(--)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("-"), String::from("-")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Db2,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Db2,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"single_quote",
|
||||
r#"'((?:[^']|'')*)'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^']|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"double_quote",
|
||||
r#""((?:[^"]|"")*)""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"]|"")*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#""""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"numeric_literal",
|
||||
r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::literal_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"right_arrow",
|
||||
"=>",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Db2,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Db2,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_#]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/db2/mod.rs
Normal file
5
sqlfluffrs/src/dialect/db2/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1229
sqlfluffrs/src/dialect/doris/matcher.rs
Normal file
1229
sqlfluffrs/src/dialect/doris/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/doris/mod.rs
Normal file
5
sqlfluffrs/src/dialect/doris/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1339
sqlfluffrs/src/dialect/duckdb/matcher.rs
Normal file
1339
sqlfluffrs/src/dialect/duckdb/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/duckdb/mod.rs
Normal file
5
sqlfluffrs/src/dialect/duckdb/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1588
sqlfluffrs/src/dialect/exasol/matcher.rs
Normal file
1588
sqlfluffrs/src/dialect/exasol/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/exasol/mod.rs
Normal file
5
sqlfluffrs/src/dialect/exasol/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
996
sqlfluffrs/src/dialect/flink/matcher.rs
Normal file
996
sqlfluffrs/src/dialect/flink/matcher.rs
Normal file
@@ -0,0 +1,996 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static FLINK_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"ALL".to_string(),
|
||||
"AND".to_string(),
|
||||
"ANY".to_string(),
|
||||
"AS".to_string(),
|
||||
"AUTHORIZATION".to_string(),
|
||||
"BETWEEN".to_string(),
|
||||
"BIGINT".to_string(),
|
||||
"BINARY".to_string(),
|
||||
"BOOLEAN".to_string(),
|
||||
"BOTH".to_string(),
|
||||
"BY".to_string(),
|
||||
"CASE".to_string(),
|
||||
"CAST".to_string(),
|
||||
"CHAR".to_string(),
|
||||
"CHARACTER".to_string(),
|
||||
"CHECK".to_string(),
|
||||
"COLLATE".to_string(),
|
||||
"COLUMN".to_string(),
|
||||
"CONSTRAINT".to_string(),
|
||||
"CREATE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"CURRENT_DATE".to_string(),
|
||||
"CURRENT_TIME".to_string(),
|
||||
"CURRENT_TIMESTAMP".to_string(),
|
||||
"CURRENT_USER".to_string(),
|
||||
"CURSOR".to_string(),
|
||||
"DATE".to_string(),
|
||||
"DAY".to_string(),
|
||||
"DECIMAL".to_string(),
|
||||
"DECLARE".to_string(),
|
||||
"DELETE".to_string(),
|
||||
"DESC".to_string(),
|
||||
"DISTINCT".to_string(),
|
||||
"DOUBLE".to_string(),
|
||||
"DROP".to_string(),
|
||||
"ELSE".to_string(),
|
||||
"END".to_string(),
|
||||
"ESCAPE".to_string(),
|
||||
"EXCEPT".to_string(),
|
||||
"EXISTS".to_string(),
|
||||
"EXTRACT".to_string(),
|
||||
"FALSE".to_string(),
|
||||
"FETCH".to_string(),
|
||||
"FILTER".to_string(),
|
||||
"FLOAT".to_string(),
|
||||
"FOR".to_string(),
|
||||
"FOREIGN".to_string(),
|
||||
"FROM".to_string(),
|
||||
"FULL".to_string(),
|
||||
"FUNCTION".to_string(),
|
||||
"GRANT".to_string(),
|
||||
"GROUP".to_string(),
|
||||
"HAVING".to_string(),
|
||||
"HOUR".to_string(),
|
||||
"IF".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"IN".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INSERT".to_string(),
|
||||
"INT".to_string(),
|
||||
"INTEGER".to_string(),
|
||||
"INTERSECT".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"INTO".to_string(),
|
||||
"IS".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LEADING".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"LIKE".to_string(),
|
||||
"LIMIT".to_string(),
|
||||
"LOCAL".to_string(),
|
||||
"MINUTE".to_string(),
|
||||
"MONTH".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"NUMERIC".to_string(),
|
||||
"OF".to_string(),
|
||||
"ON".to_string(),
|
||||
"ONLY".to_string(),
|
||||
"OR".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"OVERLAPS".to_string(),
|
||||
"OVERLAY".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"POSITION".to_string(),
|
||||
"PRIMARY".to_string(),
|
||||
"REAL".to_string(),
|
||||
"REFERENCES".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROW".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SECOND".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SESSION_USER".to_string(),
|
||||
"SET".to_string(),
|
||||
"SMALLINT".to_string(),
|
||||
"SOME".to_string(),
|
||||
"SUBSTRING".to_string(),
|
||||
"TABLE".to_string(),
|
||||
"THEN".to_string(),
|
||||
"TIME".to_string(),
|
||||
"TIMESTAMP".to_string(),
|
||||
"TINYINT".to_string(),
|
||||
"TO".to_string(),
|
||||
"TRAILING".to_string(),
|
||||
"TRUE".to_string(),
|
||||
"UNION".to_string(),
|
||||
"UNIQUE".to_string(),
|
||||
"UNKNOWN".to_string(),
|
||||
"UPDATE".to_string(),
|
||||
"USER".to_string(),
|
||||
"USING".to_string(),
|
||||
"VALUES".to_string(),
|
||||
"VARCHAR".to_string(),
|
||||
"WHEN".to_string(),
|
||||
"WHERE".to_string(),
|
||||
"WITH".to_string(),
|
||||
"YEAR".to_string(),
|
||||
]});
|
||||
|
||||
pub static FLINK_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"inline_comment",
|
||||
r#"(--)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("-"), String::from("-")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Flink,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Flink,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.)*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.)*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"double_quote",
|
||||
r#""(""|[^"\\]|\\.)*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"back_quote",
|
||||
r#"`([^`]|``)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`]|``)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"``"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"numeric_literal",
|
||||
r#"(?>(?>\d+\.\d+|\d+\.|\.\d+)([eE][+-]?\d+)?([dDfF]|BD|bd)?|\d+[eE][+-]?\d+([dDfF]|BD|bd)?|\d+([dDfFlLsSyY]|BD|bd)?)((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"equals",
|
||||
r#"==|="#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Flink,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Flink,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/flink/mod.rs
Normal file
5
sqlfluffrs/src/dialect/flink/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1316
sqlfluffrs/src/dialect/greenplum/matcher.rs
Normal file
1316
sqlfluffrs/src/dialect/greenplum/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/greenplum/mod.rs
Normal file
5
sqlfluffrs/src/dialect/greenplum/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1015
sqlfluffrs/src/dialect/hive/matcher.rs
Normal file
1015
sqlfluffrs/src/dialect/hive/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/hive/mod.rs
Normal file
5
sqlfluffrs/src/dialect/hive/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1306
sqlfluffrs/src/dialect/impala/matcher.rs
Normal file
1306
sqlfluffrs/src/dialect/impala/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/impala/mod.rs
Normal file
5
sqlfluffrs/src/dialect/impala/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1300
sqlfluffrs/src/dialect/mariadb/matcher.rs
Normal file
1300
sqlfluffrs/src/dialect/mariadb/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/mariadb/mod.rs
Normal file
5
sqlfluffrs/src/dialect/mariadb/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1271
sqlfluffrs/src/dialect/materialize/matcher.rs
Normal file
1271
sqlfluffrs/src/dialect/materialize/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/materialize/mod.rs
Normal file
5
sqlfluffrs/src/dialect/materialize/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
200
sqlfluffrs/src/dialect/mod.rs
Normal file
200
sqlfluffrs/src/dialect/mod.rs
Normal file
@@ -0,0 +1,200 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
|
||||
/* dialect mods */
|
||||
pub mod ansi;
|
||||
use crate::dialect::ansi::matcher::{ANSI_KEYWORDS, ANSI_LEXERS};
|
||||
pub mod athena;
|
||||
use crate::dialect::athena::matcher::{ATHENA_KEYWORDS, ATHENA_LEXERS};
|
||||
pub mod bigquery;
|
||||
use crate::dialect::bigquery::matcher::{BIGQUERY_KEYWORDS, BIGQUERY_LEXERS};
|
||||
pub mod clickhouse;
|
||||
use crate::dialect::clickhouse::matcher::{CLICKHOUSE_KEYWORDS, CLICKHOUSE_LEXERS};
|
||||
pub mod databricks;
|
||||
use crate::dialect::databricks::matcher::{DATABRICKS_KEYWORDS, DATABRICKS_LEXERS};
|
||||
pub mod db2;
|
||||
use crate::dialect::db2::matcher::{DB2_KEYWORDS, DB2_LEXERS};
|
||||
pub mod doris;
|
||||
use crate::dialect::doris::matcher::{DORIS_KEYWORDS, DORIS_LEXERS};
|
||||
pub mod duckdb;
|
||||
use crate::dialect::duckdb::matcher::{DUCKDB_KEYWORDS, DUCKDB_LEXERS};
|
||||
pub mod exasol;
|
||||
use crate::dialect::exasol::matcher::{EXASOL_KEYWORDS, EXASOL_LEXERS};
|
||||
pub mod flink;
|
||||
use crate::dialect::flink::matcher::{FLINK_KEYWORDS, FLINK_LEXERS};
|
||||
pub mod greenplum;
|
||||
use crate::dialect::greenplum::matcher::{GREENPLUM_KEYWORDS, GREENPLUM_LEXERS};
|
||||
pub mod hive;
|
||||
use crate::dialect::hive::matcher::{HIVE_KEYWORDS, HIVE_LEXERS};
|
||||
pub mod impala;
|
||||
use crate::dialect::impala::matcher::{IMPALA_KEYWORDS, IMPALA_LEXERS};
|
||||
pub mod mariadb;
|
||||
use crate::dialect::mariadb::matcher::{MARIADB_KEYWORDS, MARIADB_LEXERS};
|
||||
pub mod materialize;
|
||||
use crate::dialect::materialize::matcher::{MATERIALIZE_KEYWORDS, MATERIALIZE_LEXERS};
|
||||
pub mod mysql;
|
||||
use crate::dialect::mysql::matcher::{MYSQL_KEYWORDS, MYSQL_LEXERS};
|
||||
pub mod oracle;
|
||||
use crate::dialect::oracle::matcher::{ORACLE_KEYWORDS, ORACLE_LEXERS};
|
||||
pub mod postgres;
|
||||
use crate::dialect::postgres::matcher::{POSTGRES_KEYWORDS, POSTGRES_LEXERS};
|
||||
pub mod redshift;
|
||||
use crate::dialect::redshift::matcher::{REDSHIFT_KEYWORDS, REDSHIFT_LEXERS};
|
||||
pub mod snowflake;
|
||||
use crate::dialect::snowflake::matcher::{SNOWFLAKE_KEYWORDS, SNOWFLAKE_LEXERS};
|
||||
pub mod soql;
|
||||
use crate::dialect::soql::matcher::{SOQL_KEYWORDS, SOQL_LEXERS};
|
||||
pub mod sparksql;
|
||||
use crate::dialect::sparksql::matcher::{SPARKSQL_KEYWORDS, SPARKSQL_LEXERS};
|
||||
pub mod sqlite;
|
||||
use crate::dialect::sqlite::matcher::{SQLITE_KEYWORDS, SQLITE_LEXERS};
|
||||
pub mod starrocks;
|
||||
use crate::dialect::starrocks::matcher::{STARROCKS_KEYWORDS, STARROCKS_LEXERS};
|
||||
pub mod teradata;
|
||||
use crate::dialect::teradata::matcher::{TERADATA_KEYWORDS, TERADATA_LEXERS};
|
||||
pub mod trino;
|
||||
use crate::dialect::trino::matcher::{TRINO_KEYWORDS, TRINO_LEXERS};
|
||||
pub mod tsql;
|
||||
use crate::dialect::tsql::matcher::{TSQL_KEYWORDS, TSQL_LEXERS};
|
||||
pub mod vertica;
|
||||
use crate::dialect::vertica::matcher::{VERTICA_KEYWORDS, VERTICA_LEXERS};
|
||||
|
||||
use crate::matcher::LexMatcher;
|
||||
use std::str::FromStr;
|
||||
|
||||
#[derive(Debug, Eq, PartialEq, Hash, Copy, Clone)]
|
||||
pub enum Dialect {
|
||||
Ansi,
|
||||
Athena,
|
||||
Bigquery,
|
||||
Clickhouse,
|
||||
Databricks,
|
||||
Db2,
|
||||
Doris,
|
||||
Duckdb,
|
||||
Exasol,
|
||||
Flink,
|
||||
Greenplum,
|
||||
Hive,
|
||||
Impala,
|
||||
Mariadb,
|
||||
Materialize,
|
||||
Mysql,
|
||||
Oracle,
|
||||
Postgres,
|
||||
Redshift,
|
||||
Snowflake,
|
||||
Soql,
|
||||
Sparksql,
|
||||
Sqlite,
|
||||
Starrocks,
|
||||
Teradata,
|
||||
Trino,
|
||||
Tsql,
|
||||
Vertica,
|
||||
}
|
||||
|
||||
impl Dialect {
|
||||
pub(crate) fn get_reserved_keywords(&self) -> &'static Vec<String> {
|
||||
match self {
|
||||
Dialect::Ansi => &ANSI_KEYWORDS,
|
||||
Dialect::Athena => &ATHENA_KEYWORDS,
|
||||
Dialect::Bigquery => &BIGQUERY_KEYWORDS,
|
||||
Dialect::Clickhouse => &CLICKHOUSE_KEYWORDS,
|
||||
Dialect::Databricks => &DATABRICKS_KEYWORDS,
|
||||
Dialect::Db2 => &DB2_KEYWORDS,
|
||||
Dialect::Doris => &DORIS_KEYWORDS,
|
||||
Dialect::Duckdb => &DUCKDB_KEYWORDS,
|
||||
Dialect::Exasol => &EXASOL_KEYWORDS,
|
||||
Dialect::Flink => &FLINK_KEYWORDS,
|
||||
Dialect::Greenplum => &GREENPLUM_KEYWORDS,
|
||||
Dialect::Hive => &HIVE_KEYWORDS,
|
||||
Dialect::Impala => &IMPALA_KEYWORDS,
|
||||
Dialect::Mariadb => &MARIADB_KEYWORDS,
|
||||
Dialect::Materialize => &MATERIALIZE_KEYWORDS,
|
||||
Dialect::Mysql => &MYSQL_KEYWORDS,
|
||||
Dialect::Oracle => &ORACLE_KEYWORDS,
|
||||
Dialect::Postgres => &POSTGRES_KEYWORDS,
|
||||
Dialect::Redshift => &REDSHIFT_KEYWORDS,
|
||||
Dialect::Snowflake => &SNOWFLAKE_KEYWORDS,
|
||||
Dialect::Soql => &SOQL_KEYWORDS,
|
||||
Dialect::Sparksql => &SPARKSQL_KEYWORDS,
|
||||
Dialect::Sqlite => &SQLITE_KEYWORDS,
|
||||
Dialect::Starrocks => &STARROCKS_KEYWORDS,
|
||||
Dialect::Teradata => &TERADATA_KEYWORDS,
|
||||
Dialect::Trino => &TRINO_KEYWORDS,
|
||||
Dialect::Tsql => &TSQL_KEYWORDS,
|
||||
Dialect::Vertica => &VERTICA_KEYWORDS,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_lexers(&self) -> &'static Vec<LexMatcher> {
|
||||
match self {
|
||||
Dialect::Ansi => &ANSI_LEXERS,
|
||||
Dialect::Athena => &ATHENA_LEXERS,
|
||||
Dialect::Bigquery => &BIGQUERY_LEXERS,
|
||||
Dialect::Clickhouse => &CLICKHOUSE_LEXERS,
|
||||
Dialect::Databricks => &DATABRICKS_LEXERS,
|
||||
Dialect::Db2 => &DB2_LEXERS,
|
||||
Dialect::Doris => &DORIS_LEXERS,
|
||||
Dialect::Duckdb => &DUCKDB_LEXERS,
|
||||
Dialect::Exasol => &EXASOL_LEXERS,
|
||||
Dialect::Flink => &FLINK_LEXERS,
|
||||
Dialect::Greenplum => &GREENPLUM_LEXERS,
|
||||
Dialect::Hive => &HIVE_LEXERS,
|
||||
Dialect::Impala => &IMPALA_LEXERS,
|
||||
Dialect::Mariadb => &MARIADB_LEXERS,
|
||||
Dialect::Materialize => &MATERIALIZE_LEXERS,
|
||||
Dialect::Mysql => &MYSQL_LEXERS,
|
||||
Dialect::Oracle => &ORACLE_LEXERS,
|
||||
Dialect::Postgres => &POSTGRES_LEXERS,
|
||||
Dialect::Redshift => &REDSHIFT_LEXERS,
|
||||
Dialect::Snowflake => &SNOWFLAKE_LEXERS,
|
||||
Dialect::Soql => &SOQL_LEXERS,
|
||||
Dialect::Sparksql => &SPARKSQL_LEXERS,
|
||||
Dialect::Sqlite => &SQLITE_LEXERS,
|
||||
Dialect::Starrocks => &STARROCKS_LEXERS,
|
||||
Dialect::Teradata => &TERADATA_LEXERS,
|
||||
Dialect::Trino => &TRINO_LEXERS,
|
||||
Dialect::Tsql => &TSQL_LEXERS,
|
||||
Dialect::Vertica => &VERTICA_LEXERS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Dialect {
|
||||
type Err = ();
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"ansi" => Ok(Dialect::Ansi),
|
||||
"athena" => Ok(Dialect::Athena),
|
||||
"bigquery" => Ok(Dialect::Bigquery),
|
||||
"clickhouse" => Ok(Dialect::Clickhouse),
|
||||
"databricks" => Ok(Dialect::Databricks),
|
||||
"db2" => Ok(Dialect::Db2),
|
||||
"doris" => Ok(Dialect::Doris),
|
||||
"duckdb" => Ok(Dialect::Duckdb),
|
||||
"exasol" => Ok(Dialect::Exasol),
|
||||
"flink" => Ok(Dialect::Flink),
|
||||
"greenplum" => Ok(Dialect::Greenplum),
|
||||
"hive" => Ok(Dialect::Hive),
|
||||
"impala" => Ok(Dialect::Impala),
|
||||
"mariadb" => Ok(Dialect::Mariadb),
|
||||
"materialize" => Ok(Dialect::Materialize),
|
||||
"mysql" => Ok(Dialect::Mysql),
|
||||
"oracle" => Ok(Dialect::Oracle),
|
||||
"postgres" => Ok(Dialect::Postgres),
|
||||
"redshift" => Ok(Dialect::Redshift),
|
||||
"snowflake" => Ok(Dialect::Snowflake),
|
||||
"soql" => Ok(Dialect::Soql),
|
||||
"sparksql" => Ok(Dialect::Sparksql),
|
||||
"sqlite" => Ok(Dialect::Sqlite),
|
||||
"starrocks" => Ok(Dialect::Starrocks),
|
||||
"teradata" => Ok(Dialect::Teradata),
|
||||
"trino" => Ok(Dialect::Trino),
|
||||
"tsql" => Ok(Dialect::Tsql),
|
||||
"vertica" => Ok(Dialect::Vertica),
|
||||
_ => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
1305
sqlfluffrs/src/dialect/mysql/matcher.rs
Normal file
1305
sqlfluffrs/src/dialect/mysql/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/mysql/mod.rs
Normal file
5
sqlfluffrs/src/dialect/mysql/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1134
sqlfluffrs/src/dialect/oracle/matcher.rs
Normal file
1134
sqlfluffrs/src/dialect/oracle/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/oracle/mod.rs
Normal file
5
sqlfluffrs/src/dialect/oracle/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1312
sqlfluffrs/src/dialect/postgres/matcher.rs
Normal file
1312
sqlfluffrs/src/dialect/postgres/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/postgres/mod.rs
Normal file
5
sqlfluffrs/src/dialect/postgres/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1367
sqlfluffrs/src/dialect/redshift/matcher.rs
Normal file
1367
sqlfluffrs/src/dialect/redshift/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/redshift/mod.rs
Normal file
5
sqlfluffrs/src/dialect/redshift/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1229
sqlfluffrs/src/dialect/snowflake/matcher.rs
Normal file
1229
sqlfluffrs/src/dialect/snowflake/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/snowflake/mod.rs
Normal file
5
sqlfluffrs/src/dialect/snowflake/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
978
sqlfluffrs/src/dialect/soql/matcher.rs
Normal file
978
sqlfluffrs/src/dialect/soql/matcher.rs
Normal file
@@ -0,0 +1,978 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static SOQL_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"CASE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"FULL".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LAST_90_DAYS".to_string(),
|
||||
"LAST_FISCAL_QUARTER".to_string(),
|
||||
"LAST_FISCAL_YEAR".to_string(),
|
||||
"LAST_MONTH".to_string(),
|
||||
"LAST_N_DAYS".to_string(),
|
||||
"LAST_N_FISCAL_QUARTERS".to_string(),
|
||||
"LAST_N_FISCAL_YEARS".to_string(),
|
||||
"LAST_N_MONTHS".to_string(),
|
||||
"LAST_N_QUARTERS".to_string(),
|
||||
"LAST_N_WEEKS".to_string(),
|
||||
"LAST_N_YEARS".to_string(),
|
||||
"LAST_QUARTER".to_string(),
|
||||
"LAST_WEEK".to_string(),
|
||||
"LAST_YEAR".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NEXT_90_DAYS".to_string(),
|
||||
"NEXT_FISCAL_QUARTER".to_string(),
|
||||
"NEXT_FISCAL_YEAR".to_string(),
|
||||
"NEXT_MONTH".to_string(),
|
||||
"NEXT_N_DAYS".to_string(),
|
||||
"NEXT_N_FISCAL_QUARTERS".to_string(),
|
||||
"NEXT_N_FISCAL_YEARS".to_string(),
|
||||
"NEXT_N_MONTHS".to_string(),
|
||||
"NEXT_N_QUARTERS".to_string(),
|
||||
"NEXT_N_WEEKS".to_string(),
|
||||
"NEXT_N_YEARS".to_string(),
|
||||
"NEXT_QUARTER".to_string(),
|
||||
"NEXT_WEEK".to_string(),
|
||||
"NEXT_YEAR".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SET".to_string(),
|
||||
"THIS_FISCAL_QUARTER".to_string(),
|
||||
"THIS_FISCAL_YEAR".to_string(),
|
||||
"THIS_MONTH".to_string(),
|
||||
"THIS_QUARTER".to_string(),
|
||||
"THIS_WEEK".to_string(),
|
||||
"THIS_YEAR".to_string(),
|
||||
"TODAY".to_string(),
|
||||
"TOMORROW".to_string(),
|
||||
"UNION".to_string(),
|
||||
"USING".to_string(),
|
||||
"YESTERDAY".to_string(),
|
||||
]});
|
||||
|
||||
pub static SOQL_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"inline_comment",
|
||||
r#"(--|#)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("--"), String::from("#")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Soql,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Soql,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.|'')*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"double_quote",
|
||||
r#""(""|[^"\\]|\\.)*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"datetime_literal",
|
||||
r#"[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(Z|(\+|\-)[0-9]{2}:[0-9]{2})"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"date_literal",
|
||||
r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"numeric_literal",
|
||||
r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::literal_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Soql,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Soql,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/soql/mod.rs
Normal file
5
sqlfluffrs/src/dialect/soql/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1149
sqlfluffrs/src/dialect/sparksql/matcher.rs
Normal file
1149
sqlfluffrs/src/dialect/sparksql/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/sparksql/mod.rs
Normal file
5
sqlfluffrs/src/dialect/sparksql/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1151
sqlfluffrs/src/dialect/sqlite/matcher.rs
Normal file
1151
sqlfluffrs/src/dialect/sqlite/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/sqlite/mod.rs
Normal file
5
sqlfluffrs/src/dialect/sqlite/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1304
sqlfluffrs/src/dialect/starrocks/matcher.rs
Normal file
1304
sqlfluffrs/src/dialect/starrocks/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/starrocks/mod.rs
Normal file
5
sqlfluffrs/src/dialect/starrocks/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
898
sqlfluffrs/src/dialect/teradata/matcher.rs
Normal file
898
sqlfluffrs/src/dialect/teradata/matcher.rs
Normal file
@@ -0,0 +1,898 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static TERADATA_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"CASE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"FULL".to_string(),
|
||||
"IGNORE".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INTERVAL".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"LOCKING".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PARTITION".to_string(),
|
||||
"REPLACE".to_string(),
|
||||
"RESPECT".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROWS".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SET".to_string(),
|
||||
"TIMESTAMP".to_string(),
|
||||
"UNION".to_string(),
|
||||
"USING".to_string(),
|
||||
]});
|
||||
|
||||
pub static TERADATA_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"inline_comment",
|
||||
r#"(--|#)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("--"), String::from("#")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Teradata,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Teradata,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.|'')*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"double_quote",
|
||||
r#""(""|[^"\\]|\\.)*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"\\]|\\.)*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\"|"""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"numeric_literal",
|
||||
r#"([0-9]+(\.[0-9]*)?)"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Teradata,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Teradata,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/teradata/mod.rs
Normal file
5
sqlfluffrs/src/dialect/teradata/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
998
sqlfluffrs/src/dialect/trino/matcher.rs
Normal file
998
sqlfluffrs/src/dialect/trino/matcher.rs
Normal file
@@ -0,0 +1,998 @@
|
||||
/* This is a generated file! */
|
||||
use once_cell::sync::Lazy;
|
||||
use crate::matcher::{LexMatcher, extract_nested_block_comment};
|
||||
use crate::token::Token;
|
||||
use crate::token::config::TokenConfig;
|
||||
use crate::regex::RegexModeGroup;
|
||||
use crate::dialect::Dialect;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
pub static TRINO_KEYWORDS: Lazy<Vec<String>> = Lazy::new(|| { vec![
|
||||
"ALTER".to_string(),
|
||||
"AND".to_string(),
|
||||
"AS".to_string(),
|
||||
"BETWEEN".to_string(),
|
||||
"BY".to_string(),
|
||||
"CASE".to_string(),
|
||||
"CAST".to_string(),
|
||||
"CONSTRAINT".to_string(),
|
||||
"CREATE".to_string(),
|
||||
"CROSS".to_string(),
|
||||
"CUBE".to_string(),
|
||||
"CURRENT_CATALOG".to_string(),
|
||||
"CURRENT_DATE".to_string(),
|
||||
"CURRENT_PATH".to_string(),
|
||||
"CURRENT_ROLE".to_string(),
|
||||
"CURRENT_SCHEMA".to_string(),
|
||||
"CURRENT_TIME".to_string(),
|
||||
"CURRENT_TIMESTAMP".to_string(),
|
||||
"CURRENT_USER".to_string(),
|
||||
"DEALLOCATE".to_string(),
|
||||
"DELETE".to_string(),
|
||||
"DESCRIBE".to_string(),
|
||||
"DISTINCT".to_string(),
|
||||
"DROP".to_string(),
|
||||
"ELSE".to_string(),
|
||||
"END".to_string(),
|
||||
"ESCAPE".to_string(),
|
||||
"EXCEPT".to_string(),
|
||||
"EXECUTE".to_string(),
|
||||
"EXISTS".to_string(),
|
||||
"EXTRACT".to_string(),
|
||||
"FALSE".to_string(),
|
||||
"FOR".to_string(),
|
||||
"FROM".to_string(),
|
||||
"FULL".to_string(),
|
||||
"FUNCTION".to_string(),
|
||||
"GROUP".to_string(),
|
||||
"GROUPING".to_string(),
|
||||
"HAVING".to_string(),
|
||||
"IN".to_string(),
|
||||
"INNER".to_string(),
|
||||
"INSERT".to_string(),
|
||||
"INTERSECT".to_string(),
|
||||
"INTO".to_string(),
|
||||
"IS".to_string(),
|
||||
"JOIN".to_string(),
|
||||
"JSON_ARRAY".to_string(),
|
||||
"JSON_EXISTS".to_string(),
|
||||
"JSON_OBJECT".to_string(),
|
||||
"JSON_QUERY".to_string(),
|
||||
"JSON_TABLE".to_string(),
|
||||
"JSON_VALUE".to_string(),
|
||||
"LEFT".to_string(),
|
||||
"LIKE".to_string(),
|
||||
"LISTAGG".to_string(),
|
||||
"LOCALTIME".to_string(),
|
||||
"LOCALTIMESTAMP".to_string(),
|
||||
"NATURAL".to_string(),
|
||||
"NORMALIZE".to_string(),
|
||||
"NOT".to_string(),
|
||||
"NULL".to_string(),
|
||||
"ON".to_string(),
|
||||
"OR".to_string(),
|
||||
"ORDER".to_string(),
|
||||
"OUTER".to_string(),
|
||||
"PREPARE".to_string(),
|
||||
"RECURSIVE".to_string(),
|
||||
"RIGHT".to_string(),
|
||||
"ROLLUP".to_string(),
|
||||
"SELECT".to_string(),
|
||||
"SKIP".to_string(),
|
||||
"TABLE".to_string(),
|
||||
"THEN".to_string(),
|
||||
"TRIM".to_string(),
|
||||
"TRUE".to_string(),
|
||||
"UESCAPE".to_string(),
|
||||
"UNION".to_string(),
|
||||
"UNNEST".to_string(),
|
||||
"USING".to_string(),
|
||||
"VALUES".to_string(),
|
||||
"WHEN".to_string(),
|
||||
"WHERE".to_string(),
|
||||
"WITH".to_string(),
|
||||
]});
|
||||
|
||||
pub static TRINO_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"inline_comment",
|
||||
r#"(--|#)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
Some(vec![String::from("--"), String::from("#")]),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['#','-','/']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Trino,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(
|
||||
LexMatcher::regex_subdivider(
|
||||
Dialect::Trino,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::whitespace_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"single_quote",
|
||||
r#"'([^'\\]|\\.|'')*'"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"'((?:[^'\\]|\\.|'')*)'"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\'|''"#.to_string(), r#"'"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'\'', ..] => true, // Single quote case
|
||||
[b'R' | b'r', b'\'', ..] => true, // r' or R'
|
||||
[b'B' | b'b', b'\'', ..] => true, // b' or B'
|
||||
[b'R' | b'r', b'B' | b'b', b'\'', ..] => true, // rb', RB', etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'\'', ..] => true, // br', Br', etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"double_quote",
|
||||
r#""([^"]|"")*""#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#""((?:[^"]|"")*)""#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#""""#.to_string(), r#"""#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|input| match input.as_bytes() {
|
||||
[b'"', ..] => true, // Just a double quote
|
||||
[b'R' | b'r', b'"', ..] => true, // r" or R"
|
||||
[b'B' | b'b', b'"', ..] => true, // b" or B"
|
||||
[b'R' | b'r', b'B' | b'b', b'"', ..] => true, // rb", RB", etc.
|
||||
[b'B' | b'b', b'R' | b'r', b'"', ..] => true, // br", Br", etc.
|
||||
_ => false,
|
||||
},
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"back_quote",
|
||||
r#"`(?:[^`\\]|\\.)*`"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"`((?:[^`\\]|\\.)*)`"#.to_string(), RegexModeGroup::Index(1))),
|
||||
Some((r#"\\`"#.to_string(), r#"`"#.to_string())),
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"dollar_quote",
|
||||
r#"\$(\w*)\$(.*?)\$\1\$"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some((r#"\$(\w*)\$(.*?)\$\1\$"#.to_string(), RegexModeGroup::Index(2))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with("$"),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"numeric_literal",
|
||||
r#"(?>\d+\.\d+|\d+\.(?![\.\w])|\.\d+|\d+)(\.?[eE][+-]?\d+)?((?<=\.)|(?=\b))"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::literal_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|input| input.starts_with(['x','X','.','0','1','2','3','4','5','6','7','8','9']),
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"obevo_annotation",
|
||||
r#"////\s*(CHANGE|BODY|METADATA)[^\n]*"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comment_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"glob_operator",
|
||||
"~~~",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"right_arrow",
|
||||
"->",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"fat_right_arrow",
|
||||
"=>",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"like_operator",
|
||||
r#"!?~~?\*?"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::comparison_operator_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::newline_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"casting_operator",
|
||||
"::",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"equals",
|
||||
"=",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"greater_than",
|
||||
">",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"less_than",
|
||||
"<",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"not",
|
||||
"!",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"dot",
|
||||
".",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"comma",
|
||||
",",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"plus",
|
||||
"+",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"minus",
|
||||
"-",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"divide",
|
||||
"/",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"percent",
|
||||
"%",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"question",
|
||||
"?",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"ampersand",
|
||||
"&",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"vertical_bar",
|
||||
"|",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"caret",
|
||||
"^",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"star",
|
||||
"*",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"start_bracket",
|
||||
"(",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"end_bracket",
|
||||
")",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"start_square_bracket",
|
||||
"[",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"end_square_bracket",
|
||||
"]",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"start_curly_bracket",
|
||||
"{",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"end_curly_bracket",
|
||||
"}",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"colon",
|
||||
":",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::string_lexer(
|
||||
Dialect::Trino,
|
||||
"semicolon",
|
||||
";",
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::code_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
|
||||
LexMatcher::regex_lexer(
|
||||
Dialect::Trino,
|
||||
"word",
|
||||
r#"[0-9a-zA-Z_]+"#,
|
||||
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold| {
|
||||
Token::word_token(raw, pos_marker, TokenConfig {
|
||||
class_types, instance_types, trim_start, trim_chars,
|
||||
quoted_value, escape_replacement, casefold,
|
||||
})
|
||||
},
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
),
|
||||
]});
|
||||
5
sqlfluffrs/src/dialect/trino/mod.rs
Normal file
5
sqlfluffrs/src/dialect/trino/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1220
sqlfluffrs/src/dialect/tsql/matcher.rs
Normal file
1220
sqlfluffrs/src/dialect/tsql/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/tsql/mod.rs
Normal file
5
sqlfluffrs/src/dialect/tsql/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1058
sqlfluffrs/src/dialect/vertica/matcher.rs
Normal file
1058
sqlfluffrs/src/dialect/vertica/matcher.rs
Normal file
File diff suppressed because it is too large
Load Diff
5
sqlfluffrs/src/dialect/vertica/mod.rs
Normal file
5
sqlfluffrs/src/dialect/vertica/mod.rs
Normal file
@@ -0,0 +1,5 @@
|
||||
/* This is a generated file! */
|
||||
|
||||
#[allow(clippy::needless_raw_string_hashes)]
|
||||
pub mod matcher;
|
||||
// pub mod parser;
|
||||
1335
sqlfluffrs/src/lexer.rs
Normal file
1335
sqlfluffrs/src/lexer.rs
Normal file
File diff suppressed because it is too large
Load Diff
14
sqlfluffrs/src/lib.rs
Normal file
14
sqlfluffrs/src/lib.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
pub mod config;
|
||||
pub mod dialect;
|
||||
pub mod lexer;
|
||||
pub mod marker;
|
||||
pub mod matcher;
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python;
|
||||
pub mod regex;
|
||||
pub mod slice;
|
||||
pub mod templater;
|
||||
pub mod token;
|
||||
// include!(concat!(env!("OUT_DIR"), "/dialect_matcher.rs"));
|
||||
|
||||
use crate::dialect::Dialect;
|
||||
491
sqlfluffrs/src/marker.rs
Normal file
491
sqlfluffrs/src/marker.rs
Normal file
@@ -0,0 +1,491 @@
|
||||
use hashbrown::HashMap;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::Display;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::slice::Slice;
|
||||
use crate::templater::templatefile::TemplatedFile;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PositionMarker {
|
||||
pub source_slice: Slice,
|
||||
pub templated_slice: Slice,
|
||||
pub templated_file: Arc<TemplatedFile>,
|
||||
pub working_line_no: usize,
|
||||
pub working_line_pos: usize,
|
||||
}
|
||||
|
||||
impl PositionMarker {
|
||||
#[must_use]
|
||||
pub fn new(
|
||||
source_slice: Slice,
|
||||
templated_slice: Slice,
|
||||
templated_file: &Arc<TemplatedFile>,
|
||||
working_line_no: Option<usize>,
|
||||
working_line_pos: Option<usize>,
|
||||
) -> Self {
|
||||
let (working_line_no, working_line_pos) = match (working_line_no, working_line_pos) {
|
||||
(Some(working_line_no), Some(working_line_pos)) => (working_line_no, working_line_pos),
|
||||
_ => templated_file.get_line_pos_of_char_pos(source_slice.start, false),
|
||||
};
|
||||
|
||||
Self {
|
||||
source_slice,
|
||||
templated_slice,
|
||||
templated_file: Arc::clone(templated_file),
|
||||
working_line_no,
|
||||
working_line_pos,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn working_loc(&self) -> (usize, usize) {
|
||||
(self.working_line_no, self.working_line_pos)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn working_loc_after(&self, raw: &str) -> (usize, usize) {
|
||||
// Infer next position based on the raw string
|
||||
self.infer_next_position(raw, self.working_line_no, self.working_line_pos)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn infer_next_position(
|
||||
&self,
|
||||
raw: &str,
|
||||
line_no: usize,
|
||||
line_pos: usize,
|
||||
) -> (usize, usize) {
|
||||
// Placeholder for position inference logic
|
||||
// Example implementation: move forward by the length of the string
|
||||
let lines: Vec<&str> = raw.split('\n').collect();
|
||||
if lines.len() > 1 {
|
||||
let num_lines: usize = lines.len();
|
||||
let last_line_len: usize = lines.last().unwrap().len();
|
||||
(line_no + num_lines - 1, last_line_len + 1)
|
||||
} else {
|
||||
let first_line_len: usize = raw.len();
|
||||
(line_no, line_pos + first_line_len)
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn source_position(&self) -> (usize, usize) {
|
||||
self.templated_file
|
||||
.get_line_pos_of_char_pos(self.source_slice.start, true)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn templated_position(&self) -> (usize, usize) {
|
||||
self.templated_file
|
||||
.get_line_pos_of_char_pos(self.source_slice.start, false)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn line_no(&self) -> usize {
|
||||
self.source_position().0
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn line_pos(&self) -> usize {
|
||||
self.source_position().1
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn to_source_string(&self) -> String {
|
||||
let (line, pos) = self.source_position();
|
||||
format!("[L:{line:3}, P:{pos:3}]")
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn start_point_marker(&self) -> Self {
|
||||
PositionMarker::from_point(
|
||||
self.source_slice.start,
|
||||
self.templated_slice.start,
|
||||
&self.templated_file,
|
||||
Some(self.working_line_no),
|
||||
Some(self.working_line_pos),
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn end_point_marker(&self) -> Self {
|
||||
PositionMarker::from_point(
|
||||
self.source_slice.stop,
|
||||
self.templated_slice.stop,
|
||||
&self.templated_file,
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn is_point(&self) -> bool {
|
||||
slice_is_point(&self.source_slice) && slice_is_point(&self.templated_slice)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_working_position(&self, line_no: usize, line_pos: usize) -> Self {
|
||||
PositionMarker {
|
||||
working_line_no: line_no,
|
||||
working_line_pos: line_pos,
|
||||
..self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn is_literal(&self) -> bool {
|
||||
self.templated_file
|
||||
.is_source_slice_literal(&self.source_slice)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn source_str(&self) -> String {
|
||||
self.templated_file
|
||||
.source_str
|
||||
.chars()
|
||||
.skip(self.source_slice.start)
|
||||
.take(self.source_slice.len())
|
||||
.collect::<String>()
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn to_source_dict(&self) -> HashMap<String, usize> {
|
||||
self.templated_file
|
||||
.source_position_dict_from_slice(&self.source_slice)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_point(
|
||||
source_point: usize,
|
||||
templated_point: usize,
|
||||
templated_file: &Arc<TemplatedFile>,
|
||||
working_line_no: Option<usize>,
|
||||
working_line_pos: Option<usize>,
|
||||
) -> Self {
|
||||
let source_slice = Slice::from(source_point..source_point);
|
||||
let templated_slice = Slice::from(templated_point..templated_point);
|
||||
|
||||
PositionMarker::new(
|
||||
source_slice,
|
||||
templated_slice,
|
||||
templated_file,
|
||||
working_line_no,
|
||||
working_line_pos,
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_points(start_marker: &PositionMarker, end_marker: &PositionMarker) -> Self {
|
||||
if start_marker.templated_file != end_marker.templated_file {
|
||||
panic!("Markers must refer to the same templated file.");
|
||||
}
|
||||
|
||||
PositionMarker::new(
|
||||
start_marker.source_slice,
|
||||
start_marker.templated_slice,
|
||||
&start_marker.templated_file,
|
||||
Some(start_marker.working_line_no),
|
||||
Some(start_marker.working_line_pos),
|
||||
)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn from_child_markers(markers: &[Option<PositionMarker>]) -> Self {
|
||||
let mut source_start = usize::MAX;
|
||||
let mut source_stop = usize::MIN;
|
||||
let mut templated_start = usize::MAX;
|
||||
let mut templated_stop = usize::MIN;
|
||||
|
||||
let mut templated_file = None;
|
||||
|
||||
for marker in markers.iter().filter_map(|m| m.as_ref()) {
|
||||
source_start = source_start.min(marker.source_slice.start);
|
||||
source_stop = source_stop.max(marker.source_slice.stop);
|
||||
templated_start = templated_start.min(marker.templated_slice.start);
|
||||
templated_stop = templated_stop.max(marker.templated_slice.stop);
|
||||
|
||||
if templated_file.is_none() {
|
||||
templated_file = Some(marker.templated_file.clone());
|
||||
}
|
||||
if templated_file.as_ref() != Some(&marker.templated_file) {
|
||||
panic!("Markers must refer to the same templated file.");
|
||||
}
|
||||
}
|
||||
|
||||
let source_slice = Slice::from(source_start..source_stop);
|
||||
let templated_slice = Slice::from(templated_start..templated_stop);
|
||||
|
||||
PositionMarker::new(
|
||||
source_slice,
|
||||
templated_slice,
|
||||
&templated_file.unwrap(),
|
||||
None,
|
||||
None,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for PositionMarker {}
|
||||
|
||||
impl PartialEq for PositionMarker {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.working_loc() == other.working_loc()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for PositionMarker {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for PositionMarker {
|
||||
fn cmp(&self, other: &Self) -> Ordering {
|
||||
self.working_loc().cmp(&other.working_loc())
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PositionMarker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.to_source_string())
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn slice_is_point(test_slice: &Slice) -> bool {
|
||||
test_slice.start == test_slice.stop
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python {
|
||||
use std::{fmt::Display, sync::Arc};
|
||||
|
||||
use hashbrown::HashMap;
|
||||
use pyo3::{prelude::*, types::PyType};
|
||||
|
||||
use crate::{
|
||||
slice::Slice,
|
||||
templater::templatefile::{
|
||||
python::{PySqlFluffTemplatedFile, PyTemplatedFile},
|
||||
TemplatedFile,
|
||||
},
|
||||
};
|
||||
|
||||
use super::PositionMarker;
|
||||
|
||||
#[pyclass(name = "RsPositionMarker", str, eq, ord, frozen, module = "sqlfluffrs")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PyPositionMarker(pub PositionMarker);
|
||||
|
||||
#[pymethods]
|
||||
impl PyPositionMarker {
|
||||
#[getter]
|
||||
pub fn source_slice(&self) -> Slice {
|
||||
self.0.source_slice
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn templated_slice(&self) -> Slice {
|
||||
self.0.templated_slice
|
||||
}
|
||||
|
||||
// #[getter]
|
||||
// pub fn templated_file(&self) -> PySqlFluffTemplatedFile {
|
||||
// dbg!("templated file from PositionMarker");
|
||||
// PySqlFluffTemplatedFile(PyTemplatedFile::from(self.0.templated_file.clone()))
|
||||
// }
|
||||
|
||||
#[getter]
|
||||
pub fn templated_file(&self) -> PyTemplatedFile {
|
||||
PyTemplatedFile(self.0.templated_file.clone())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn working_line_no(&self) -> usize {
|
||||
self.0.working_line_no
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn working_line_pos(&self) -> usize {
|
||||
self.0.working_line_pos
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn working_loc(&self) -> (usize, usize) {
|
||||
(self.0.working_line_no, self.0.working_line_pos)
|
||||
}
|
||||
|
||||
pub fn start_point_marker(&self) -> Self {
|
||||
Self(self.0.start_point_marker())
|
||||
}
|
||||
|
||||
pub fn end_point_marker(&self) -> Self {
|
||||
Self(self.0.end_point_marker())
|
||||
}
|
||||
|
||||
pub fn source_position(&self) -> (usize, usize) {
|
||||
self.0.source_position()
|
||||
}
|
||||
|
||||
pub fn templated_position(&self) -> (usize, usize) {
|
||||
self.0.templated_position()
|
||||
}
|
||||
|
||||
pub fn is_literal(&self) -> bool {
|
||||
self.0.is_literal()
|
||||
}
|
||||
|
||||
pub fn with_working_position(&self, line_no: usize, line_pos: usize) -> Self {
|
||||
Self(self.0.with_working_position(line_no, line_pos))
|
||||
}
|
||||
|
||||
pub fn infer_next_position(
|
||||
&self,
|
||||
raw: &str,
|
||||
line_no: usize,
|
||||
line_pos: usize,
|
||||
) -> (usize, usize) {
|
||||
self.0.infer_next_position(raw, line_no, line_pos)
|
||||
}
|
||||
|
||||
pub fn line_no(&self) -> usize {
|
||||
self.0.line_no()
|
||||
}
|
||||
|
||||
pub fn line_pos(&self) -> usize {
|
||||
self.0.line_pos()
|
||||
}
|
||||
|
||||
pub fn source_str(&self) -> String {
|
||||
self.0.source_str()
|
||||
}
|
||||
|
||||
pub fn to_source_dict(&self) -> HashMap<String, usize> {
|
||||
self.0.to_source_dict()
|
||||
}
|
||||
|
||||
#[classmethod]
|
||||
#[pyo3(signature = (markers))]
|
||||
pub fn from_child_markers(
|
||||
_cls: &Bound<'_, PyType>,
|
||||
markers: Vec<Option<PyPositionMarker>>,
|
||||
) -> PyResult<Self> {
|
||||
let rust_markers: Vec<Option<PositionMarker>> =
|
||||
markers.into_iter().map(|m| m.map(Into::into)).collect();
|
||||
Ok(Self(PositionMarker::from_child_markers(&rust_markers)))
|
||||
}
|
||||
|
||||
#[classmethod]
|
||||
pub fn from_point(
|
||||
_cls: &Bound<'_, PyType>,
|
||||
source_point: usize,
|
||||
templated_point: usize,
|
||||
templated_file: PySqlFluffTemplatedFile,
|
||||
working_line_no: Option<usize>,
|
||||
working_line_pos: Option<usize>,
|
||||
) -> Self {
|
||||
let templated_file = templated_file.0 .0;
|
||||
Self(PositionMarker::from_point(
|
||||
source_point,
|
||||
templated_point,
|
||||
&templated_file,
|
||||
working_line_no,
|
||||
working_line_pos,
|
||||
))
|
||||
}
|
||||
|
||||
#[classmethod]
|
||||
pub fn from_points(
|
||||
_cls: &Bound<'_, PyType>,
|
||||
start_marker: &PyPositionMarker,
|
||||
end_marker: &PyPositionMarker,
|
||||
) -> Self {
|
||||
Self(PositionMarker::from_points(&start_marker.0, &end_marker.0))
|
||||
}
|
||||
|
||||
pub fn is_point(&self) -> bool {
|
||||
self.0.is_point()
|
||||
}
|
||||
|
||||
pub fn to_source_string(&self) -> String {
|
||||
self.0.to_source_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PyPositionMarker {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0.to_source_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyPositionMarker> for PySqlFluffTemplatedFile {
|
||||
fn from(value: PyPositionMarker) -> Self {
|
||||
PySqlFluffTemplatedFile(PyTemplatedFile::from(value.0.templated_file.clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyPositionMarker> for PositionMarker {
|
||||
fn from(value: PyPositionMarker) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PositionMarker> for PyPositionMarker {
|
||||
fn from(value: PositionMarker) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for PyPositionMarker {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.eq(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for PyPositionMarker {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.0.partial_cmp(&other.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, IntoPyObject, Debug)]
|
||||
pub struct PySqlFluffPositionMarker(pub PyPositionMarker);
|
||||
|
||||
impl<'py> FromPyObject<'py> for PySqlFluffPositionMarker {
|
||||
fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
|
||||
let source_slice = obj.getattr("source_slice")?.extract::<Slice>()?;
|
||||
// dbg!(source_slice);
|
||||
let templated_slice = obj.getattr("templated_slice")?.extract::<Slice>()?;
|
||||
// dbg!(templated_slice);
|
||||
let templated_file: Arc<TemplatedFile> = obj
|
||||
.getattr("templated_file")?
|
||||
.extract::<PySqlFluffTemplatedFile>()?
|
||||
.into();
|
||||
// dbg!(templated_file.clone());
|
||||
// let working_line_no = obj.getattr("working_line_no")?.extract::<usize>()?;
|
||||
// let working_line_pos = obj.getattr("working_line_pos")?.extract::<usize>()?;
|
||||
|
||||
Ok(Self(PyPositionMarker(PositionMarker::new(
|
||||
source_slice,
|
||||
templated_slice,
|
||||
&templated_file,
|
||||
None,
|
||||
None,
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffPositionMarker> for PyPositionMarker {
|
||||
fn from(value: PySqlFluffPositionMarker) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffPositionMarker> for PositionMarker {
|
||||
fn from(value: PySqlFluffPositionMarker) -> Self {
|
||||
value.0 .0
|
||||
}
|
||||
}
|
||||
}
|
||||
482
sqlfluffrs/src/matcher.rs
Normal file
482
sqlfluffrs/src/matcher.rs
Normal file
@@ -0,0 +1,482 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use fancy_regex::{Regex as FancyRegex, RegexBuilder as FancyRegexBuilder};
|
||||
use hashbrown::HashSet;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
|
||||
use crate::{
|
||||
dialect::Dialect,
|
||||
marker::PositionMarker,
|
||||
regex::RegexModeGroup,
|
||||
token::Token,
|
||||
};
|
||||
|
||||
/// Legacy function pointer type for token generation (maintains backward compatibility)
|
||||
/// This signature accepts individual parameters and constructs a TokenConfig internally
|
||||
pub type TokenGenerator = fn(
|
||||
String, // raw
|
||||
PositionMarker, // pos_marker
|
||||
HashSet<String>, // class_types
|
||||
Vec<String>, // instance_types
|
||||
Option<Vec<String>>, // trim_start
|
||||
Option<Vec<String>>, // trim_chars
|
||||
Option<(String, RegexModeGroup)>, // quoted_value
|
||||
Option<(String, String)>, // escape_replacement
|
||||
Option<fn(&str) -> str>, // casefold
|
||||
) -> Token;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum LexerMode {
|
||||
String(String), // Match a literal string
|
||||
Regex(Regex, fn(&str) -> bool), // Match using a regex
|
||||
FancyRegex(FancyRegex, fn(&str) -> bool), // Match using a regex
|
||||
Function(fn(&str, Dialect) -> Option<&str>),
|
||||
}
|
||||
|
||||
impl Display for LexerMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match *self {
|
||||
LexerMode::Regex(_, _) => write!(f, "RegexMatcher"),
|
||||
LexerMode::FancyRegex(_, _) => write!(f, "FancyRegexMatcher"),
|
||||
LexerMode::String(_) => write!(f, "StringMatcher"),
|
||||
LexerMode::Function(_) => write!(f, "FunctionMatcher"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LexedElement<'a> {
|
||||
pub raw: &'a str,
|
||||
pub matcher: &'a LexMatcher,
|
||||
}
|
||||
|
||||
impl<'a> LexedElement<'a> {
|
||||
pub fn new(raw: &'a str, matcher: &'a LexMatcher) -> Self {
|
||||
Self { raw, matcher }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LexMatcher {
|
||||
pub dialect: Dialect,
|
||||
pub name: String,
|
||||
pub mode: LexerMode,
|
||||
pub token_class_func: TokenGenerator,
|
||||
pub subdivider: Option<Box<LexMatcher>>,
|
||||
pub trim_post_subdivide: Option<Box<LexMatcher>>,
|
||||
pub trim_start: Option<Vec<String>>,
|
||||
pub trim_chars: Option<Vec<String>>,
|
||||
pub quoted_value: Option<(String, RegexModeGroup)>,
|
||||
pub escape_replacements: Option<(String, String)>,
|
||||
pub casefold: Option<fn(&str) -> str>,
|
||||
pub kwarg_type: Option<String>,
|
||||
}
|
||||
|
||||
impl Display for LexMatcher {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "<{}: {}>", self.mode, self.name)
|
||||
}
|
||||
}
|
||||
|
||||
impl LexMatcher {
|
||||
pub fn string_lexer(
|
||||
dialect: Dialect,
|
||||
name: &str,
|
||||
template: &str,
|
||||
token_class_func: TokenGenerator,
|
||||
subdivider: Option<Box<LexMatcher>>,
|
||||
trim_post_subdivide: Option<Box<LexMatcher>>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacements: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
kwarg_type: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
dialect,
|
||||
name: name.to_string(),
|
||||
mode: LexerMode::String(template.to_string()),
|
||||
token_class_func,
|
||||
subdivider,
|
||||
trim_post_subdivide,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacements,
|
||||
casefold,
|
||||
kwarg_type,
|
||||
}
|
||||
}
|
||||
|
||||
fn base_regex_lexer(
|
||||
dialect: Dialect,
|
||||
name: &str,
|
||||
pattern: &str,
|
||||
token_class_func: TokenGenerator,
|
||||
subdivider: Option<Box<LexMatcher>>,
|
||||
trim_post_subdivide: Option<Box<LexMatcher>>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacements: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
|
||||
precheck: fn(&str) -> bool,
|
||||
kwarg_type: Option<String>,
|
||||
) -> Self {
|
||||
let mode = match RegexBuilder::new(&pattern).build() {
|
||||
Ok(regex) => LexerMode::Regex(regex, precheck),
|
||||
Err(_) => match FancyRegexBuilder::new(&pattern).build() {
|
||||
Ok(regex) => LexerMode::FancyRegex(regex, precheck),
|
||||
Err(_) => {
|
||||
if let Some(fallback) = fallback_lexer {
|
||||
LexerMode::Function(fallback)
|
||||
} else {
|
||||
panic!(
|
||||
"Unable to compile regex {} and no fallback function provided",
|
||||
pattern
|
||||
)
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
Self {
|
||||
dialect,
|
||||
name: name.to_string(),
|
||||
mode,
|
||||
token_class_func,
|
||||
subdivider,
|
||||
trim_post_subdivide,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacements,
|
||||
casefold,
|
||||
kwarg_type,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn regex_lexer(
|
||||
dialect: Dialect,
|
||||
name: &str,
|
||||
template: &str,
|
||||
token_class_func: TokenGenerator,
|
||||
subdivider: Option<Box<LexMatcher>>,
|
||||
trim_post_subdivide: Option<Box<LexMatcher>>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacements: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
|
||||
precheck: fn(&str) -> bool,
|
||||
kwarg_type: Option<String>,
|
||||
) -> Self {
|
||||
let pattern = format!(r"(?s)\A(?:{})", template);
|
||||
Self::base_regex_lexer(
|
||||
dialect,
|
||||
name,
|
||||
&pattern,
|
||||
token_class_func,
|
||||
subdivider,
|
||||
trim_post_subdivide,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacements,
|
||||
casefold,
|
||||
fallback_lexer,
|
||||
precheck,
|
||||
kwarg_type,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn regex_subdivider(
|
||||
dialect: Dialect,
|
||||
name: &str,
|
||||
template: &str,
|
||||
token_class_func: TokenGenerator,
|
||||
subdivider: Option<Box<LexMatcher>>,
|
||||
trim_post_subdivide: Option<Box<LexMatcher>>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacements: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
fallback_lexer: Option<fn(&str, Dialect) -> Option<&str>>,
|
||||
precheck: fn(&str) -> bool,
|
||||
kwarg_type: Option<String>,
|
||||
) -> Self {
|
||||
let pattern = format!(r"(?:{})", template);
|
||||
Self::base_regex_lexer(
|
||||
dialect,
|
||||
name,
|
||||
&pattern,
|
||||
token_class_func,
|
||||
subdivider,
|
||||
trim_post_subdivide,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacements,
|
||||
casefold,
|
||||
fallback_lexer,
|
||||
precheck,
|
||||
kwarg_type,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn scan_match<'a>(&'a self, input: &'a str) -> Option<(Vec<LexedElement<'a>>, usize)> {
|
||||
// let t = Instant::now();
|
||||
if input.is_empty() {
|
||||
panic!("Unexpected empty string!");
|
||||
}
|
||||
|
||||
// Match based on the mode
|
||||
let matched = match &self.mode {
|
||||
LexerMode::String(template) => input
|
||||
.starts_with(template)
|
||||
.then(|| LexedElement::new(template, self)),
|
||||
LexerMode::Regex(regex, is_match_valid) => {
|
||||
if !(is_match_valid)(input) {
|
||||
// println!("{},{}", self.name, t.elapsed().as_nanos());
|
||||
return None;
|
||||
}
|
||||
regex
|
||||
.find(input)
|
||||
.map(|mat| LexedElement::new(mat.as_str(), self))
|
||||
}
|
||||
LexerMode::FancyRegex(regex, is_match_valid) => {
|
||||
if !(is_match_valid)(input) {
|
||||
// println!("{},{}", self.name, t.elapsed().as_nanos());
|
||||
return None;
|
||||
}
|
||||
regex
|
||||
.find(input)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|mat| LexedElement::new(mat.as_str(), self))
|
||||
}
|
||||
LexerMode::Function(function) => {
|
||||
(function)(input, self.dialect).map(|s| LexedElement::new(s, self))
|
||||
}
|
||||
};
|
||||
// println!("{},{}", self.name, t.elapsed().as_nanos());
|
||||
|
||||
// Handle subdivision and trimming
|
||||
if let Some(matched) = matched {
|
||||
let len = matched.raw.len();
|
||||
let elements = self.subdivide(matched);
|
||||
Some((elements, len))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn search(&self, input: &str) -> Option<(usize, usize)> {
|
||||
match &self.mode {
|
||||
LexerMode::String(template) => input.find(template).map(|start| {
|
||||
let end = start + template.len();
|
||||
(start, end)
|
||||
}),
|
||||
LexerMode::Regex(regex, _) => regex.find(input).map(|mat| (mat.start(), mat.end())),
|
||||
LexerMode::FancyRegex(regex, _) => regex
|
||||
.find(input)
|
||||
.ok()
|
||||
.flatten()
|
||||
.map(|mat| (mat.start(), mat.end())),
|
||||
_ => todo!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn subdivide<'a>(&'a self, matched: LexedElement<'a>) -> Vec<LexedElement<'a>> {
|
||||
if let Some(subdivider) = &self.subdivider {
|
||||
let mut elements = Vec::new();
|
||||
let mut buffer = matched.raw;
|
||||
while !buffer.is_empty() {
|
||||
if let Some((start, end)) = subdivider.search(buffer) {
|
||||
let trimmed_elems = self.trim_match(&buffer[..start]);
|
||||
elements.extend(trimmed_elems);
|
||||
elements.push(LexedElement {
|
||||
raw: &buffer[start..end],
|
||||
matcher: subdivider,
|
||||
});
|
||||
buffer = &buffer[end..];
|
||||
} else {
|
||||
let trimmed_elems = self.trim_match(buffer);
|
||||
elements.extend(trimmed_elems);
|
||||
break;
|
||||
}
|
||||
}
|
||||
elements
|
||||
} else {
|
||||
vec![matched]
|
||||
}
|
||||
}
|
||||
|
||||
fn trim_match<'a>(&'a self, raw: &'a str) -> Vec<LexedElement<'a>> {
|
||||
let mut elements = Vec::new();
|
||||
let mut buffer = raw;
|
||||
let mut content_buffer = 0..0;
|
||||
|
||||
if let Some(trim_post_subdivide) = &self.trim_post_subdivide {
|
||||
while !buffer.is_empty() {
|
||||
if let Some((start, end)) = trim_post_subdivide.search(buffer) {
|
||||
if start == 0 {
|
||||
// Starting match
|
||||
elements.push(LexedElement {
|
||||
raw: &buffer[..end],
|
||||
matcher: trim_post_subdivide,
|
||||
});
|
||||
buffer = &buffer[end..];
|
||||
content_buffer = end..end;
|
||||
} else if end == buffer.len() {
|
||||
elements.push(LexedElement {
|
||||
raw: &raw[content_buffer.start..content_buffer.end + start],
|
||||
matcher: self,
|
||||
});
|
||||
elements.push(LexedElement {
|
||||
raw: &buffer[start..end],
|
||||
matcher: trim_post_subdivide,
|
||||
});
|
||||
return elements;
|
||||
} else {
|
||||
content_buffer.end += end;
|
||||
buffer = &buffer[end..];
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !content_buffer.is_empty() || !buffer.is_empty() {
|
||||
elements.push(LexedElement::new(&raw[content_buffer.start..], self));
|
||||
}
|
||||
elements
|
||||
}
|
||||
|
||||
pub fn construct_token(&self, raw: &str, pos_marker: PositionMarker) -> Token {
|
||||
let instance_types = match self.kwarg_type.clone() {
|
||||
Some(t) => vec![t],
|
||||
None => vec![self.name.clone()],
|
||||
};
|
||||
|
||||
(self.token_class_func)(
|
||||
raw.to_string(),
|
||||
pos_marker,
|
||||
HashSet::new(),
|
||||
instance_types,
|
||||
self.trim_start.clone(),
|
||||
self.trim_chars.clone(),
|
||||
self.quoted_value.clone(),
|
||||
self.escape_replacements.clone(),
|
||||
self.casefold,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract_nested_block_comment(input: &str, dialect: Dialect) -> Option<&str> {
|
||||
let mut chars = input.chars().peekable();
|
||||
let mut comment = String::new();
|
||||
|
||||
// Ensure the input starts with "/*"
|
||||
if chars.next() != Some('/') || chars.next() != Some('*') {
|
||||
return None;
|
||||
}
|
||||
|
||||
comment.push_str("/*"); // Add the opening delimiter
|
||||
let mut depth = 1; // Track nesting level
|
||||
|
||||
while let Some(c) = chars.next() {
|
||||
comment.push(c);
|
||||
|
||||
if c == '/' && chars.peek() == Some(&'*') {
|
||||
chars.next(); // Consume '*'
|
||||
comment.push('*');
|
||||
depth += 1;
|
||||
} else if c == '*' && chars.peek() == Some(&'/') {
|
||||
chars.next(); // Consume '/'
|
||||
comment.push('/');
|
||||
depth -= 1;
|
||||
|
||||
if depth == 0 {
|
||||
return Some(&input[..comment.len()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we reach here, the comment wasn't properly closed
|
||||
match &dialect {
|
||||
Dialect::Sqlite => Some(&input[..comment.len()]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: implement python passthroughs
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{dialect::Dialect, token::Token};
|
||||
|
||||
use super::{extract_nested_block_comment, LexMatcher};
|
||||
|
||||
#[test]
|
||||
fn test_subdivide() {
|
||||
let block_comment_matcher = LexMatcher::regex_lexer(
|
||||
Dialect::Ansi,
|
||||
"block_comment",
|
||||
r#"\/\*([^\*]|\*(?!\/))*\*\/"#,
|
||||
Token::comment_token_compat,
|
||||
Some(Box::new(LexMatcher::regex_subdivider(
|
||||
Dialect::Ansi,
|
||||
"newline",
|
||||
r#"\r\n|\n"#,
|
||||
Token::newline_token_compat,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
Some(Box::new(LexMatcher::regex_subdivider(
|
||||
Dialect::Ansi,
|
||||
"whitespace",
|
||||
r#"[^\S\r\n]+"#,
|
||||
Token::whitespace_token_compat,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
|_| true,
|
||||
None,
|
||||
))),
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
Some(extract_nested_block_comment),
|
||||
|input| input.starts_with("/"),
|
||||
None,
|
||||
);
|
||||
|
||||
let (elems, _) = block_comment_matcher
|
||||
.scan_match("/*\n)\n*/")
|
||||
.expect("should match");
|
||||
for elem in elems {
|
||||
println!("{}: {}", elem.matcher.name, elem.raw);
|
||||
}
|
||||
}
|
||||
}
|
||||
25
sqlfluffrs/src/python.rs
Normal file
25
sqlfluffrs/src/python.rs
Normal file
@@ -0,0 +1,25 @@
|
||||
use crate::lexer::python::{PyLexer, PySQLLexError};
|
||||
use crate::marker::python::PyPositionMarker;
|
||||
use crate::templater::{
|
||||
fileslice::python::{PyRawFileSlice, PyTemplatedFileSlice},
|
||||
templatefile::python::PyTemplatedFile,
|
||||
};
|
||||
use crate::token::python::PyToken;
|
||||
use pyo3::prelude::*;
|
||||
|
||||
/// A Python module implemented in Rust.
|
||||
#[pymodule(name = "sqlfluffrs", module = "sqlfluffrs")]
|
||||
fn sqlfluffrs(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
let env = env_logger::Env::default().filter_or("RUST_LOG", "warn");
|
||||
env_logger::Builder::from_env(env)
|
||||
.try_init()
|
||||
.unwrap_or_else(|_| log::warn!("env_logger already initialized!"));
|
||||
m.add_class::<PyToken>()?;
|
||||
m.add_class::<PyTemplatedFile>()?;
|
||||
m.add_class::<PyTemplatedFileSlice>()?;
|
||||
m.add_class::<PyRawFileSlice>()?;
|
||||
m.add_class::<PySQLLexError>()?;
|
||||
m.add_class::<PyLexer>()?;
|
||||
m.add_class::<PyPositionMarker>()?;
|
||||
Ok(())
|
||||
}
|
||||
85
sqlfluffrs/src/regex.rs
Normal file
85
sqlfluffrs/src/regex.rs
Normal file
@@ -0,0 +1,85 @@
|
||||
use std::fmt::Display;
|
||||
|
||||
use fancy_regex::{Regex as FancyRegex, RegexBuilder as FancyRegexBuilder};
|
||||
#[cfg(feature = "python")]
|
||||
use pyo3::pyclass;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
|
||||
#[cfg_attr(feature = "python", pyclass)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RegexModeGroup {
|
||||
Index(usize),
|
||||
Name(String),
|
||||
}
|
||||
|
||||
impl From<usize> for RegexModeGroup {
|
||||
fn from(idx: usize) -> Self {
|
||||
RegexModeGroup::Index(idx)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<&str> for RegexModeGroup {
|
||||
fn from(name: &str) -> Self {
|
||||
RegexModeGroup::Name(name.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<String> for RegexModeGroup {
|
||||
fn from(name: String) -> Self {
|
||||
RegexModeGroup::Name(name)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum RegexMode {
|
||||
Regex(Regex), // Match using a regex
|
||||
FancyRegex(FancyRegex), // Match using a regex
|
||||
}
|
||||
|
||||
impl RegexMode {
|
||||
pub fn new(pattern: &str) -> Self {
|
||||
// Try to compile with the standard regex first
|
||||
if let Ok(re) = RegexBuilder::new(pattern).build() {
|
||||
RegexMode::Regex(re)
|
||||
} else if let Ok(re) = FancyRegexBuilder::new(pattern).build() {
|
||||
RegexMode::FancyRegex(re)
|
||||
} else {
|
||||
panic!("Invalid regex pattern: {}", pattern);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn capture(&self, group: impl Into<RegexModeGroup>, text: &str) -> Option<String> {
|
||||
match self {
|
||||
RegexMode::Regex(re) => {
|
||||
let caps = re.captures(text)?;
|
||||
match group.into() {
|
||||
RegexModeGroup::Index(idx) => caps.get(idx).map(|m| m.as_str().to_string()),
|
||||
RegexModeGroup::Name(name) => caps.name(&name).map(|m| m.as_str().to_string()),
|
||||
}
|
||||
}
|
||||
RegexMode::FancyRegex(re) => {
|
||||
let caps = re.captures(text).ok()??;
|
||||
match group.into() {
|
||||
RegexModeGroup::Index(idx) => caps.get(idx).map(|m| m.as_str().to_string()),
|
||||
RegexModeGroup::Name(name) => caps.name(&name).map(|m| m.as_str().to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn replace_all(&self, text: &str, replacement: &str) -> String {
|
||||
match self {
|
||||
RegexMode::Regex(re) => re.replace_all(text, replacement).to_string(),
|
||||
RegexMode::FancyRegex(re) => re.replace_all(text, replacement).to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for RegexMode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match *self {
|
||||
RegexMode::Regex(_) => write!(f, "Regex"),
|
||||
RegexMode::FancyRegex(_) => write!(f, "FancyRegex"),
|
||||
}
|
||||
}
|
||||
}
|
||||
67
sqlfluffrs/src/slice.rs
Normal file
67
sqlfluffrs/src/slice.rs
Normal file
@@ -0,0 +1,67 @@
|
||||
use std::{fmt::Display, ops::Range};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, PartialEq, Hash, Eq, Clone, Copy, Serialize, Deserialize)]
|
||||
pub struct Slice {
|
||||
pub start: usize,
|
||||
pub stop: usize,
|
||||
}
|
||||
|
||||
impl From<Range<usize>> for Slice {
|
||||
fn from(value: Range<usize>) -> Self {
|
||||
Self {
|
||||
start: value.start,
|
||||
stop: value.end,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Slice {
|
||||
pub fn slice_is_point(test_slice: &Range<usize>) -> bool {
|
||||
test_slice.start == test_slice.end
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.stop - self.start
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Slice {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "slice({}, {}, None)", self.start, self.stop)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python {
|
||||
use super::Slice;
|
||||
use pyo3::{prelude::*, types::PySlice};
|
||||
|
||||
impl<'py> FromPyObject<'py> for Slice {
|
||||
fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
|
||||
let start = obj.getattr("start")?.extract::<usize>()?;
|
||||
let stop = obj.getattr("stop")?.extract::<usize>()?;
|
||||
Ok(Slice { start, stop })
|
||||
}
|
||||
}
|
||||
|
||||
impl<'py> IntoPyObject<'py> for Slice {
|
||||
type Target = PySlice; // the Python type
|
||||
type Output = Bound<'py, Self::Target>; // in most cases this will be `Bound`
|
||||
type Error = PyErr; // the conversion error type, has to be convertible to `PyErr`
|
||||
|
||||
fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, Self::Error> {
|
||||
Ok(PySlice::new(
|
||||
py,
|
||||
self.start.try_into()?,
|
||||
self.stop.try_into()?,
|
||||
1,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
323
sqlfluffrs/src/templater/fileslice.rs
Normal file
323
sqlfluffrs/src/templater/fileslice.rs
Normal file
@@ -0,0 +1,323 @@
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::slice::Slice;
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Hash, Serialize, Deserialize)]
|
||||
pub struct RawFileSlice {
|
||||
pub raw: String, // Source string
|
||||
pub slice_type: String,
|
||||
pub source_idx: usize, // Offset from beginning of source string
|
||||
// Block index, incremented on start or end block tags, e.g. "if", "for".
|
||||
// This is used in `BaseRule.discard_unsafe_fixes()` to reject any fixes
|
||||
// which span multiple templated blocks.
|
||||
pub block_idx: usize,
|
||||
// The command of a templated tag, e.g. "if", "for"
|
||||
// This is used in template tracing as a kind of cache to identify the kind
|
||||
// of template element this is without having to re-extract it each time.
|
||||
pub tag: Option<String>,
|
||||
}
|
||||
|
||||
impl RawFileSlice {
|
||||
pub fn new(
|
||||
raw: String,
|
||||
slice_type: String,
|
||||
source_idx: usize,
|
||||
block_idx: Option<usize>,
|
||||
tag: Option<String>,
|
||||
) -> Self {
|
||||
RawFileSlice {
|
||||
raw,
|
||||
slice_type,
|
||||
source_idx,
|
||||
block_idx: block_idx.unwrap_or_default(),
|
||||
tag,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn end_source_idx(&self) -> usize {
|
||||
// Return the closing index of this slice.
|
||||
let len: usize = self.raw.chars().count();
|
||||
self.source_idx + len
|
||||
}
|
||||
|
||||
pub fn source_slice(&self) -> Slice {
|
||||
Slice::from(self.source_idx..self.end_source_idx())
|
||||
}
|
||||
|
||||
pub fn is_source_only_slice(&self) -> bool {
|
||||
// Based on its slice_type, does it only appear in the *source*?
|
||||
// There are some slice types which are automatically source only.
|
||||
// There are *also* some which are source only because they render
|
||||
// to an empty string.
|
||||
// TODO: should any new logic go here?
|
||||
matches!(
|
||||
self.slice_type.as_str(),
|
||||
"comment" | "block_end" | "block_start" | "block_mid"
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Hash, Serialize, Deserialize)]
|
||||
pub struct TemplatedFileSlice {
|
||||
pub slice_type: String,
|
||||
pub source_codepoint_slice: Slice,
|
||||
pub templated_codepoint_slice: Slice,
|
||||
}
|
||||
|
||||
impl TemplatedFileSlice {
|
||||
pub fn new(
|
||||
slice_type: String,
|
||||
source_codepoint_slice: Slice,
|
||||
templated_codepoint_slice: Slice,
|
||||
) -> Self {
|
||||
TemplatedFileSlice {
|
||||
slice_type,
|
||||
source_codepoint_slice,
|
||||
templated_codepoint_slice,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python {
|
||||
use bincode;
|
||||
use pyo3::{prelude::*, types::PyBytes};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::slice::Slice;
|
||||
|
||||
use super::{RawFileSlice, TemplatedFileSlice};
|
||||
|
||||
#[pyclass(name = "RsRawFileSlice", module = "sqlfluffrs")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub struct PyRawFileSlice(pub(crate) RawFileSlice);
|
||||
|
||||
#[pymethods]
|
||||
impl PyRawFileSlice {
|
||||
#[new]
|
||||
#[pyo3(signature = (raw, slice_type, source_idx, block_idx=0, tag=None))]
|
||||
pub fn new(
|
||||
raw: String,
|
||||
slice_type: String,
|
||||
source_idx: usize,
|
||||
block_idx: Option<usize>,
|
||||
tag: Option<String>,
|
||||
) -> Self {
|
||||
Self(RawFileSlice::new(
|
||||
raw, slice_type, source_idx, block_idx, tag,
|
||||
))
|
||||
}
|
||||
|
||||
pub fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
|
||||
*self = bincode::deserialize(state.as_bytes()).map_err(|e| {
|
||||
PyErr::new::<pyo3::exceptions::PyException, _>(format!(
|
||||
"Deserialization error: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
|
||||
let bytes = bincode::serialize(&self.0).map_err(|e| {
|
||||
PyErr::new::<pyo3::exceptions::PyException, _>(format!(
|
||||
"Serialization error: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
Ok(PyBytes::new(py, &bytes))
|
||||
}
|
||||
|
||||
pub fn __getnewargs__(&self) -> PyResult<(String, String, usize, usize, Option<String>)> {
|
||||
Ok((
|
||||
self.raw(),
|
||||
self.slice_type(),
|
||||
self.source_idx(),
|
||||
self.block_idx(),
|
||||
self.tag(),
|
||||
))
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn raw(&self) -> String {
|
||||
self.0.raw.clone()
|
||||
}
|
||||
#[getter]
|
||||
pub fn slice_type(&self) -> String {
|
||||
self.0.slice_type.clone()
|
||||
}
|
||||
#[getter]
|
||||
pub fn source_idx(&self) -> usize {
|
||||
self.0.source_idx
|
||||
}
|
||||
#[getter]
|
||||
pub fn block_idx(&self) -> usize {
|
||||
self.0.block_idx
|
||||
}
|
||||
#[getter]
|
||||
pub fn tag(&self) -> Option<String> {
|
||||
self.0.tag.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyRawFileSlice> for RawFileSlice {
|
||||
fn from(value: PyRawFileSlice) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RawFileSlice> for PyRawFileSlice {
|
||||
fn from(value: RawFileSlice) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(name = "RsTemplatedFileSlice", module = "sqlfluffrs")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone, Debug, PartialEq, Hash, Serialize, Deserialize)]
|
||||
pub struct PyTemplatedFileSlice(pub(crate) TemplatedFileSlice);
|
||||
|
||||
#[pymethods]
|
||||
impl PyTemplatedFileSlice {
|
||||
#[new]
|
||||
fn new(
|
||||
slice_type: String,
|
||||
source_codepoint_slice: Slice,
|
||||
templated_codepoint_slice: Slice,
|
||||
) -> Self {
|
||||
Self(TemplatedFileSlice::new(
|
||||
slice_type,
|
||||
source_codepoint_slice,
|
||||
templated_codepoint_slice,
|
||||
))
|
||||
}
|
||||
pub fn __setstate__(&mut self, state: Bound<'_, PyBytes>) -> PyResult<()> {
|
||||
*self = bincode::deserialize(state.as_bytes()).map_err(|e| {
|
||||
PyErr::new::<pyo3::exceptions::PyException, _>(format!(
|
||||
"Deserialization error: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn __getstate__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyBytes>> {
|
||||
let bytes = bincode::serialize(&self.0).map_err(|e| {
|
||||
PyErr::new::<pyo3::exceptions::PyException, _>(format!(
|
||||
"Serialization error: {}",
|
||||
e
|
||||
))
|
||||
})?;
|
||||
Ok(PyBytes::new(py, &bytes))
|
||||
}
|
||||
|
||||
pub fn __getnewargs__(&self) -> PyResult<(String, Slice, Slice)> {
|
||||
Ok((
|
||||
self.0.slice_type.clone(),
|
||||
self.0.source_codepoint_slice,
|
||||
self.0.templated_codepoint_slice,
|
||||
))
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn slice_type(&self) -> PyResult<String> {
|
||||
Ok(self.0.slice_type.clone())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn source_slice(&self) -> PyResult<Slice> {
|
||||
Ok(self.0.source_codepoint_slice)
|
||||
}
|
||||
|
||||
#[getter]
|
||||
fn templated_slice(&self) -> PyResult<Slice> {
|
||||
Ok(self.0.templated_codepoint_slice)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyTemplatedFileSlice> for TemplatedFileSlice {
|
||||
fn from(value: PyTemplatedFileSlice) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TemplatedFileSlice> for PyTemplatedFileSlice {
|
||||
fn from(value: TemplatedFileSlice) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
pub mod sqlfluff {
|
||||
use pyo3::prelude::*;
|
||||
|
||||
use crate::{
|
||||
slice::Slice,
|
||||
templater::fileslice::{RawFileSlice, TemplatedFileSlice},
|
||||
};
|
||||
|
||||
use super::{PyRawFileSlice, PyTemplatedFileSlice};
|
||||
|
||||
#[derive(Clone, IntoPyObject)]
|
||||
pub struct PySqlFluffTemplatedFileSlice(pub PyTemplatedFileSlice);
|
||||
|
||||
impl<'py> FromPyObject<'py> for PySqlFluffTemplatedFileSlice {
|
||||
fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
|
||||
let slice_type = obj.getattr("slice_type")?.extract::<String>()?;
|
||||
let source_slice = obj.getattr("source_slice")?.extract::<Slice>()?;
|
||||
let templated_slice = obj.getattr("templated_slice")?.extract::<Slice>()?;
|
||||
|
||||
Ok(Self(PyTemplatedFileSlice(TemplatedFileSlice::new(
|
||||
slice_type,
|
||||
source_slice,
|
||||
templated_slice,
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffTemplatedFileSlice> for PyTemplatedFileSlice {
|
||||
fn from(value: PySqlFluffTemplatedFileSlice) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffTemplatedFileSlice> for TemplatedFileSlice {
|
||||
fn from(value: PySqlFluffTemplatedFileSlice) -> Self {
|
||||
value.0 .0
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct PySqlFluffRawFileSlice(pub PyRawFileSlice);
|
||||
|
||||
impl<'py> FromPyObject<'py> for PySqlFluffRawFileSlice {
|
||||
fn extract_bound(obj: &pyo3::Bound<'py, pyo3::PyAny>) -> PyResult<Self> {
|
||||
let raw = obj.getattr("raw")?.extract::<String>()?;
|
||||
let slice_type = obj.getattr("slice_type")?.extract::<String>()?;
|
||||
let source_idx = obj.getattr("source_idx")?.extract::<usize>().ok();
|
||||
let block_idx = obj.getattr("block_idx")?.extract::<usize>().ok();
|
||||
let tag = obj.getattr("tag")?.extract::<Option<String>>()?;
|
||||
|
||||
Ok(Self(PyRawFileSlice(RawFileSlice::new(
|
||||
raw.clone(),
|
||||
slice_type,
|
||||
source_idx.unwrap_or(raw.len()),
|
||||
block_idx,
|
||||
tag,
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffRawFileSlice> for PyRawFileSlice {
|
||||
fn from(value: PySqlFluffRawFileSlice) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffRawFileSlice> for RawFileSlice {
|
||||
fn from(value: PySqlFluffRawFileSlice) -> Self {
|
||||
value.0 .0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
2
sqlfluffrs/src/templater/mod.rs
Normal file
2
sqlfluffrs/src/templater/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
pub mod fileslice;
|
||||
pub mod templatefile;
|
||||
1324
sqlfluffrs/src/templater/templatefile.rs
Normal file
1324
sqlfluffrs/src/templater/templatefile.rs
Normal file
File diff suppressed because it is too large
Load Diff
296
sqlfluffrs/src/token/compat.rs
Normal file
296
sqlfluffrs/src/token/compat.rs
Normal file
@@ -0,0 +1,296 @@
|
||||
// Wrapper functions that maintain the old TokenGenerator signature for backward compatibility
|
||||
// These are used by the generated dialect matcher code
|
||||
|
||||
use super::{config::TokenConfig, Token};
|
||||
use crate::{marker::PositionMarker, regex::RegexModeGroup};
|
||||
use hashbrown::HashSet;
|
||||
|
||||
impl Token {
|
||||
// Wrapper functions that convert from the old 9-parameter signature to TokenConfig
|
||||
|
||||
pub fn whitespace_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::whitespace_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn newline_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::newline_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn comment_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::comment_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn code_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn symbol_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::symbol_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn identifier_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::identifier_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn literal_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::literal_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn binary_operator_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::binary_operator_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn comparison_operator_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::comparison_operator_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn word_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::word_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
pub fn unlexable_token_compat(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
class_types: HashSet<String>,
|
||||
instance_types: Vec<String>,
|
||||
trim_start: Option<Vec<String>>,
|
||||
trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
) -> Self {
|
||||
Self::unlexable_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
},
|
||||
)
|
||||
}
|
||||
}
|
||||
93
sqlfluffrs/src/token/config.rs
Normal file
93
sqlfluffrs/src/token/config.rs
Normal file
@@ -0,0 +1,93 @@
|
||||
use crate::regex::RegexModeGroup;
|
||||
use hashbrown::HashSet;
|
||||
|
||||
/// Configuration for token construction, grouping optional parameters
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct TokenConfig {
|
||||
pub class_types: HashSet<String>,
|
||||
pub instance_types: Vec<String>,
|
||||
pub trim_start: Option<Vec<String>>,
|
||||
pub trim_chars: Option<Vec<String>>,
|
||||
pub quoted_value: Option<(String, RegexModeGroup)>,
|
||||
pub escape_replacement: Option<(String, String)>,
|
||||
pub casefold: Option<fn(&str) -> str>,
|
||||
}
|
||||
|
||||
impl TokenConfig {
|
||||
/// Create a new TokenConfig with default values (all empty/None)
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Create TokenConfig with only instance_types set
|
||||
pub fn with_instance_types(instance_types: Vec<String>) -> Self {
|
||||
Self {
|
||||
instance_types,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Create TokenConfig with class_types and instance_types
|
||||
pub fn with_types(class_types: HashSet<String>, instance_types: Vec<String>) -> Self {
|
||||
Self {
|
||||
class_types,
|
||||
instance_types,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder method to add trim_start
|
||||
pub fn trim_start(mut self, chars: Vec<String>) -> Self {
|
||||
self.trim_start = Some(chars);
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to add trim_chars
|
||||
pub fn trim_chars(mut self, chars: Vec<String>) -> Self {
|
||||
self.trim_chars = Some(chars);
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to add quoted_value
|
||||
pub fn quoted_value(mut self, value: String, mode: RegexModeGroup) -> Self {
|
||||
self.quoted_value = Some((value, mode));
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to add escape_replacement
|
||||
pub fn escape_replacement(mut self, pattern: String, replacement: String) -> Self {
|
||||
self.escape_replacement = Some((pattern, replacement));
|
||||
self
|
||||
}
|
||||
|
||||
/// Builder method to add casefold function
|
||||
pub fn casefold(mut self, func: fn(&str) -> str) -> Self {
|
||||
self.casefold = Some(func);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper to extract individual fields for backward compatibility
|
||||
impl TokenConfig {
|
||||
pub fn into_parts(
|
||||
self,
|
||||
) -> (
|
||||
HashSet<String>,
|
||||
Vec<String>,
|
||||
Option<Vec<String>>,
|
||||
Option<Vec<String>>,
|
||||
Option<(String, RegexModeGroup)>,
|
||||
Option<(String, String)>,
|
||||
Option<fn(&str) -> str>,
|
||||
) {
|
||||
(
|
||||
self.class_types,
|
||||
self.instance_types,
|
||||
self.trim_start,
|
||||
self.trim_chars,
|
||||
self.quoted_value,
|
||||
self.escape_replacement,
|
||||
self.casefold,
|
||||
)
|
||||
}
|
||||
}
|
||||
442
sqlfluffrs/src/token/construction.rs
Normal file
442
sqlfluffrs/src/token/construction.rs
Normal file
@@ -0,0 +1,442 @@
|
||||
use super::{config::TokenConfig, Token};
|
||||
use crate::{marker::PositionMarker, slice::Slice, templater::templatefile::TemplatedFile};
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
use hashbrown::HashSet;
|
||||
use uuid::Uuid;
|
||||
|
||||
impl Token {
|
||||
pub fn base_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
segments: Vec<Token>,
|
||||
) -> Self {
|
||||
let TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
} = config;
|
||||
|
||||
let (token_types, class_types) = iter_base_types("base", class_types.clone());
|
||||
let raw_value = Token::normalize(&raw, quoted_value.clone(), escape_replacement.clone());
|
||||
Self {
|
||||
token_type: token_types,
|
||||
instance_types,
|
||||
class_types,
|
||||
comment_separate: false,
|
||||
is_meta: false,
|
||||
allow_empty: false,
|
||||
pos_marker: Some(pos_marker),
|
||||
raw,
|
||||
is_whitespace: false,
|
||||
is_code: true,
|
||||
is_comment: false,
|
||||
_default_raw: "".to_string(),
|
||||
indent_value: 0,
|
||||
is_templated: false,
|
||||
block_uuid: None,
|
||||
source_str: None,
|
||||
block_type: None,
|
||||
parent: None,
|
||||
parent_idx: None,
|
||||
segments,
|
||||
preface_modifier: "".to_string(),
|
||||
suffix: "".to_string(),
|
||||
uuid: Uuid::new_v4().as_u128(),
|
||||
source_fixes: None,
|
||||
trim_start,
|
||||
trim_chars,
|
||||
quoted_value,
|
||||
escape_replacement,
|
||||
casefold,
|
||||
raw_value,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn raw_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("raw", config.class_types.clone());
|
||||
let suffix = format!("'{}'", raw.escape_debug().to_string().trim_matches('"'));
|
||||
|
||||
let mut token = Token::base_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
vec![],
|
||||
);
|
||||
token.suffix = suffix;
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn code_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
Self::raw_token(raw, pos_marker, config)
|
||||
}
|
||||
|
||||
pub fn symbol_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("symbol", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn identifier_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("identifier", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn literal_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("literal", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn binary_operator_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("binary_operator", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn comparison_operator_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("comparison_operator", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn word_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("word", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn unlexable_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("unlexable", config.class_types.clone());
|
||||
let mut token = Self::code_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn whitespace_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("whitespace", config.class_types.clone());
|
||||
let mut token = Self::raw_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token.is_whitespace = true;
|
||||
token.is_code = false;
|
||||
token.is_comment = false;
|
||||
token._default_raw = " ".to_string();
|
||||
token
|
||||
}
|
||||
|
||||
pub fn newline_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("newline", config.class_types.clone());
|
||||
let mut token = Self::raw_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token.is_whitespace = true;
|
||||
token.is_code = false;
|
||||
token.is_comment = false;
|
||||
token._default_raw = "\n".to_string();
|
||||
token
|
||||
}
|
||||
|
||||
pub fn comment_token(
|
||||
raw: String,
|
||||
pos_marker: PositionMarker,
|
||||
config: TokenConfig,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("comment", config.class_types.clone());
|
||||
let mut token = Self::raw_token(
|
||||
raw,
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
..config
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token.is_code = false;
|
||||
token.is_comment = true;
|
||||
token
|
||||
}
|
||||
|
||||
pub fn meta_token(
|
||||
pos_marker: PositionMarker,
|
||||
is_templated: bool,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("meta", class_types.clone());
|
||||
let mut token = Self::raw_token(
|
||||
"".to_string(),
|
||||
pos_marker,
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types: vec![],
|
||||
..TokenConfig::default()
|
||||
},
|
||||
);
|
||||
token.token_type = token_type;
|
||||
token.is_code = false;
|
||||
token.is_meta = true;
|
||||
token.is_templated = is_templated;
|
||||
token.block_uuid = block_uuid;
|
||||
token.preface_modifier = "[META] ".to_string();
|
||||
token.suffix = String::new();
|
||||
token
|
||||
}
|
||||
|
||||
pub fn end_of_file_token(
|
||||
pos_marker: PositionMarker,
|
||||
is_templated: bool,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("end_of_file", class_types);
|
||||
Self {
|
||||
token_type,
|
||||
..Self::meta_token(
|
||||
pos_marker,
|
||||
is_templated,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn indent_token(
|
||||
pos_marker: PositionMarker,
|
||||
is_templated: bool,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("indent", class_types);
|
||||
Self {
|
||||
token_type,
|
||||
indent_value: 1,
|
||||
suffix: block_uuid
|
||||
.map(|u| u.as_hyphenated().to_string())
|
||||
.unwrap_or_default(),
|
||||
..Self::meta_token(
|
||||
pos_marker,
|
||||
is_templated,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn dedent_token(
|
||||
pos_marker: PositionMarker,
|
||||
is_templated: bool,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("dedent", class_types);
|
||||
Self {
|
||||
token_type,
|
||||
indent_value: -1,
|
||||
..Self::indent_token(
|
||||
pos_marker,
|
||||
is_templated,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn template_loop_token(
|
||||
pos_marker: PositionMarker,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("template_loop", class_types);
|
||||
Self {
|
||||
token_type,
|
||||
..Self::meta_token(
|
||||
pos_marker,
|
||||
false,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn template_placeholder_token(
|
||||
pos_marker: PositionMarker,
|
||||
source_string: String,
|
||||
block_type: String,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let (token_type, class_types) = iter_base_types("placeholder", class_types);
|
||||
Self {
|
||||
token_type,
|
||||
block_type: Some(block_type),
|
||||
source_str: Some(source_string),
|
||||
..Self::meta_token(
|
||||
pos_marker,
|
||||
false,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn template_placeholder_token_from_slice(
|
||||
source_slice: Slice,
|
||||
templated_slice: Slice,
|
||||
block_type: String,
|
||||
templated_file: &Arc<TemplatedFile>,
|
||||
block_uuid: Option<Uuid>,
|
||||
class_types: HashSet<String>,
|
||||
) -> Self {
|
||||
let pos_marker = PositionMarker::new(
|
||||
source_slice,
|
||||
templated_slice,
|
||||
templated_file,
|
||||
None,
|
||||
None,
|
||||
);
|
||||
Self {
|
||||
..Self::template_placeholder_token(
|
||||
pos_marker,
|
||||
templated_file
|
||||
.source_str
|
||||
.chars()
|
||||
.skip(source_slice.start)
|
||||
.take(source_slice.len())
|
||||
.collect::<String>(),
|
||||
block_type,
|
||||
block_uuid,
|
||||
class_types,
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn iter_base_types(token_type: &str, class_types: HashSet<String>) -> (String, HashSet<String>) {
|
||||
let mut class_types = class_types;
|
||||
let token_type = token_type.to_string();
|
||||
class_types.insert(token_type.clone());
|
||||
(token_type, class_types)
|
||||
}
|
||||
24
sqlfluffrs/src/token/eq.rs
Normal file
24
sqlfluffrs/src/token/eq.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use std::hash::Hash;
|
||||
|
||||
use super::Token;
|
||||
|
||||
impl PartialEq for Token {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.uuid == other.uuid
|
||||
|| (self.token_type == other.token_type
|
||||
&& self.raw == other.raw
|
||||
&& self.pos_marker.is_some()
|
||||
&& other.pos_marker.is_some()
|
||||
&& self.pos_marker == other.pos_marker)
|
||||
}
|
||||
}
|
||||
|
||||
impl Hash for Token {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.token_type.hash(state);
|
||||
self.raw.hash(state);
|
||||
if let Some(p) = self.pos_marker.as_ref() {
|
||||
p.working_loc().hash(state);
|
||||
}
|
||||
}
|
||||
}
|
||||
21
sqlfluffrs/src/token/fix.rs
Normal file
21
sqlfluffrs/src/token/fix.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use crate::slice::Slice;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SourceFix {
|
||||
edit: String,
|
||||
source_slice: Slice,
|
||||
templated_slice: Slice,
|
||||
}
|
||||
|
||||
impl PartialEq for SourceFix {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.edit == other.edit && self.source_slice == other.source_slice
|
||||
}
|
||||
}
|
||||
|
||||
impl std::hash::Hash for SourceFix {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.edit.hash(state);
|
||||
self.source_slice.hash(state);
|
||||
}
|
||||
}
|
||||
14
sqlfluffrs/src/token/fmt.rs
Normal file
14
sqlfluffrs/src/token/fmt.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
use super::Token;
|
||||
use std::fmt::Display;
|
||||
|
||||
impl Display for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"<{}: ({}) '{}'>",
|
||||
self.token_type.clone(),
|
||||
self.pos_marker.clone().expect("PositionMarker unset"),
|
||||
self.raw.escape_debug(),
|
||||
)
|
||||
}
|
||||
}
|
||||
830
sqlfluffrs/src/token/mod.rs
Normal file
830
sqlfluffrs/src/token/mod.rs
Normal file
@@ -0,0 +1,830 @@
|
||||
pub mod compat;
|
||||
pub mod config;
|
||||
pub mod construction;
|
||||
mod eq;
|
||||
pub mod fix;
|
||||
mod fmt;
|
||||
pub mod path;
|
||||
#[cfg(feature = "python")]
|
||||
pub mod python;
|
||||
|
||||
use std::{
|
||||
fmt::Write,
|
||||
sync::{Arc, Weak},
|
||||
};
|
||||
|
||||
use fix::SourceFix;
|
||||
use hashbrown::HashSet;
|
||||
use path::PathStep;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
marker::PositionMarker,
|
||||
regex::{RegexMode, RegexModeGroup},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TupleSerialisedSegment {
|
||||
Str(String, String),
|
||||
Nested(String, Vec<TupleSerialisedSegment>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Token {
|
||||
pub token_type: String,
|
||||
pub instance_types: Vec<String>,
|
||||
pub class_types: HashSet<String>,
|
||||
pub comment_separate: bool,
|
||||
pub is_meta: bool,
|
||||
pub allow_empty: bool,
|
||||
pub pos_marker: Option<PositionMarker>,
|
||||
pub raw: String,
|
||||
is_whitespace: bool,
|
||||
is_code: bool,
|
||||
is_comment: bool,
|
||||
_default_raw: String,
|
||||
pub indent_value: i32,
|
||||
pub is_templated: bool,
|
||||
pub block_uuid: Option<Uuid>,
|
||||
pub source_str: Option<String>,
|
||||
pub block_type: Option<String>,
|
||||
parent: Option<Weak<Token>>,
|
||||
parent_idx: Option<usize>,
|
||||
pub segments: Vec<Token>,
|
||||
preface_modifier: String,
|
||||
suffix: String,
|
||||
pub uuid: u128,
|
||||
pub source_fixes: Option<Vec<SourceFix>>,
|
||||
pub trim_start: Option<Vec<String>>,
|
||||
pub trim_chars: Option<Vec<String>>,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
casefold: Option<fn(&str) -> str>,
|
||||
raw_value: String,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
fn comments(&self) -> Vec<Token> {
|
||||
self.segments
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|s| s.is_type(&["comment"]))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn non_comments(&self) -> Vec<Token> {
|
||||
self.segments
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter(|s| !s.is_type(&["comment"]))
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
/// Returns True if this segment is code.
|
||||
pub fn is_code(&self) -> bool {
|
||||
match self.is_raw() {
|
||||
true => self.is_code,
|
||||
false => self.segments.iter().any(|s| s.is_code()),
|
||||
}
|
||||
}
|
||||
|
||||
fn code_indices(&self) -> Vec<usize> {
|
||||
self.segments
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|(_i, s)| s.is_code())
|
||||
.map(|(i, _s)| i)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn is_comment(&self) -> bool {
|
||||
match self.is_raw() {
|
||||
true => self.is_comment,
|
||||
false => self.segments.iter().all(|s| s.is_comment()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
match self.is_raw() {
|
||||
true => self.is_whitespace,
|
||||
false => self.segments.iter().all(|s| s.is_whitespace()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn raw(&self) -> String {
|
||||
self.raw.clone()
|
||||
}
|
||||
|
||||
pub fn raw_upper(&self) -> String {
|
||||
self.raw.to_uppercase()
|
||||
}
|
||||
|
||||
pub fn normalize(
|
||||
value: &str,
|
||||
quoted_value: Option<(String, RegexModeGroup)>,
|
||||
escape_replacement: Option<(String, String)>,
|
||||
) -> String {
|
||||
let mut str_buffer = value.to_string();
|
||||
|
||||
if let Some((ref regex_str, idx)) = quoted_value {
|
||||
if let Some(captured) = RegexMode::new(regex_str).capture(idx, value) {
|
||||
str_buffer = captured
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((ref regex_str, ref replacement)) = escape_replacement {
|
||||
str_buffer = RegexMode::new(regex_str).replace_all(&str_buffer, replacement.as_str());
|
||||
}
|
||||
|
||||
str_buffer
|
||||
}
|
||||
|
||||
pub fn raw_segments(&self) -> Vec<Token> {
|
||||
match self.is_raw() {
|
||||
true => vec![self.clone()],
|
||||
false => self
|
||||
.segments
|
||||
.iter()
|
||||
.flat_map(|s| s.raw_segments())
|
||||
.collect::<Vec<_>>(),
|
||||
}
|
||||
}
|
||||
|
||||
/// The set of full types for this token, including inherited.
|
||||
/// Adds the surrogate type for raw segments.
|
||||
pub fn class_types(&self) -> HashSet<String> {
|
||||
let mut full_types = self.instance_types.iter().cloned().collect::<HashSet<_>>();
|
||||
full_types.extend(self.class_types.clone());
|
||||
full_types
|
||||
}
|
||||
|
||||
pub fn descendant_type_set(&self) -> HashSet<String> {
|
||||
self.segments
|
||||
.iter()
|
||||
.flat_map(|seg| {
|
||||
seg.descendant_type_set()
|
||||
.union(&seg.class_types())
|
||||
.cloned()
|
||||
.collect::<HashSet<String>>()
|
||||
})
|
||||
.collect::<HashSet<String>>()
|
||||
}
|
||||
|
||||
pub fn direct_descendant_type_set(&self) -> HashSet<String> {
|
||||
self.segments
|
||||
.iter()
|
||||
.flat_map(|seg| seg.class_types())
|
||||
.collect::<HashSet<String>>()
|
||||
}
|
||||
|
||||
pub fn raw_segments_with_ancestors(&self) -> Vec<(Token, Vec<PathStep>)> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn source_fixes(&self) -> Vec<SourceFix> {
|
||||
match self.is_raw() {
|
||||
true => self.source_fixes.clone().unwrap_or_default(),
|
||||
false => self
|
||||
.segments
|
||||
.iter()
|
||||
.flat_map(|s| s.source_fixes())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn first_non_whitespace_segment_raw_upper(&self) -> Option<String> {
|
||||
self.raw_segments().iter().find_map(|seg| {
|
||||
if !seg.raw_upper().trim().is_empty() {
|
||||
Some(seg.raw_upper().clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_templated(&self) -> bool {
|
||||
let pos_marker = self.pos_marker.clone().expect("PositionMarker must be set");
|
||||
pos_marker.source_slice.start != pos_marker.source_slice.stop && !pos_marker.is_literal()
|
||||
}
|
||||
|
||||
pub fn get_type(&self) -> String {
|
||||
self.token_type.clone()
|
||||
}
|
||||
|
||||
pub fn is_type(&self, seg_types: &[&str]) -> bool {
|
||||
if self
|
||||
.instance_types
|
||||
.iter()
|
||||
.any(|s| seg_types.contains(&s.as_str()))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
self.class_is_type(seg_types)
|
||||
}
|
||||
|
||||
pub fn get_raw_segments(&self) -> Vec<Token> {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn raw_trimmed(&self) -> String {
|
||||
let mut raw_buff = self.raw.clone();
|
||||
|
||||
// Trim start sequences
|
||||
if let Some(trim_start) = &self.trim_start {
|
||||
for seq in trim_start {
|
||||
raw_buff = raw_buff.strip_prefix(seq).unwrap_or(&raw_buff).to_string();
|
||||
}
|
||||
}
|
||||
|
||||
// Trim specified characters from both ends
|
||||
if let Some(trim_chars) = &self.trim_chars {
|
||||
raw_buff = self.raw.clone(); // Reset raw_buff before trimming chars
|
||||
|
||||
for seq in trim_chars {
|
||||
while raw_buff.starts_with(seq) {
|
||||
raw_buff = raw_buff.strip_prefix(seq).unwrap_or(&raw_buff).to_string();
|
||||
}
|
||||
while raw_buff.ends_with(seq) {
|
||||
raw_buff = raw_buff.strip_suffix(seq).unwrap_or(&raw_buff).to_string();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
raw_buff
|
||||
}
|
||||
|
||||
fn _raw_normalized(&self) -> String {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn raw_normalized(&self) -> String {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn stringify(&self, ident: usize, tabsize: usize, code_only: bool) -> String {
|
||||
let mut buff = String::new();
|
||||
let preface = self.preface(ident, tabsize);
|
||||
writeln!(buff, "{}", preface).unwrap();
|
||||
|
||||
if !code_only && self.comment_separate && !self.comments().is_empty() {
|
||||
if !self.comments().is_empty() {
|
||||
writeln!(buff, "{}Comments:", " ".repeat((ident + 1) * tabsize)).unwrap();
|
||||
for seg in &self.comments() {
|
||||
let segment_string = seg.stringify(ident + 2, tabsize, code_only);
|
||||
buff.push_str(&segment_string);
|
||||
}
|
||||
}
|
||||
|
||||
if !self.non_comments().is_empty() {
|
||||
writeln!(buff, "{}Code:", " ".repeat((ident + 1) * tabsize)).unwrap();
|
||||
for seg in &self.non_comments() {
|
||||
let segment_string = seg.stringify(ident + 2, tabsize, code_only);
|
||||
buff.push_str(&segment_string);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for seg in &self.segments {
|
||||
if !code_only || seg.is_code {
|
||||
let segment_string = seg.stringify(ident + 1, tabsize, code_only);
|
||||
buff.push_str(&segment_string);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
buff
|
||||
}
|
||||
|
||||
pub fn edit(&self, raw: Option<String>, source_fixes: Option<Vec<SourceFix>>) -> Self {
|
||||
Self {
|
||||
raw: raw.unwrap_or(self.raw.clone()),
|
||||
source_fixes: Some(source_fixes.unwrap_or(self.source_fixes())),
|
||||
uuid: Uuid::new_v4().as_u128(),
|
||||
..self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
// pub fn _get_raw_segment_kwargs(&self) -> HashMap<String, _> {
|
||||
// let kwargs = HashMap::new();
|
||||
// kwargs.insert("quoted_value", self.quoted_value);
|
||||
// kwargs.insert("escape_replacements", vec![self.escape_replacement]);
|
||||
// kwargs
|
||||
// }
|
||||
|
||||
pub fn iter_unparseables(&self) -> Vec<Token> {
|
||||
self.segments
|
||||
.iter()
|
||||
.flat_map(|s| s.iter_unparseables())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_parent(&mut self, parent: Arc<Token>, idx: usize) {
|
||||
self.parent = Some(Arc::downgrade(&parent));
|
||||
self.parent_idx = Some(idx);
|
||||
}
|
||||
|
||||
pub fn class_is_type(&self, seg_types: &[&str]) -> bool {
|
||||
let seg_hash: HashSet<&str> = seg_types.iter().cloned().collect();
|
||||
!self
|
||||
.class_types
|
||||
.iter()
|
||||
.filter(|s| seg_hash.contains(s.as_str()))
|
||||
.collect::<Vec<_>>()
|
||||
.is_empty()
|
||||
}
|
||||
|
||||
pub fn count_segments(&self, raw_only: bool) -> usize {
|
||||
if self.is_raw() {
|
||||
1
|
||||
} else {
|
||||
let self_count = if raw_only { 0 } else { 1 };
|
||||
self.segments
|
||||
.iter()
|
||||
.fold(0, |acc, s| acc + s.count_segments(raw_only) + self_count)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_raw(&self) -> bool {
|
||||
self.segments.is_empty()
|
||||
}
|
||||
|
||||
pub fn block_type(&self) -> Option<String> {
|
||||
self.block_type.clone()
|
||||
}
|
||||
|
||||
pub fn recursive_crawl(
|
||||
&self,
|
||||
seg_types: &[&str],
|
||||
recurse_into: bool,
|
||||
no_recursive_seg_type: Option<&[&str]>,
|
||||
allow_self: bool,
|
||||
) -> Vec<Token> {
|
||||
let mut results = Vec::new();
|
||||
|
||||
// If recurse_into is False and this matches, don't recurse
|
||||
if !recurse_into && self.is_type(seg_types) {
|
||||
if allow_self {
|
||||
results.push(self.clone());
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// Check if self matches the given segment types
|
||||
if allow_self && self.is_type(seg_types) {
|
||||
results.push(self.clone());
|
||||
}
|
||||
|
||||
// Convert no_recursive_seg_type to HashSet for efficient lookups
|
||||
let no_recursive_set: HashSet<&str> = no_recursive_seg_type
|
||||
.unwrap_or(&[])
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
// Recursively process child segments
|
||||
for seg in &self.segments {
|
||||
if no_recursive_set.contains(seg.token_type.as_str()) {
|
||||
continue;
|
||||
}
|
||||
results.extend(seg.recursive_crawl(
|
||||
seg_types,
|
||||
recurse_into,
|
||||
no_recursive_seg_type,
|
||||
true,
|
||||
));
|
||||
}
|
||||
|
||||
results
|
||||
}
|
||||
|
||||
pub fn path_to(self, other: Self) -> Vec<PathStep> {
|
||||
// Return empty if they are the same segment.
|
||||
if self == other {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
// If there are no child segments, return empty.
|
||||
if self.segments.is_empty() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
// Identifying the highest parent we can using any preset parent values.
|
||||
let mut midpoint = other.clone();
|
||||
let mut lower_path = Vec::new();
|
||||
|
||||
while let Some(weak_parent) = &midpoint.parent.clone().as_ref() {
|
||||
if let Some(parent) = weak_parent.upgrade() {
|
||||
let parent_idx = midpoint.parent_idx.expect("Parent index must be set.");
|
||||
|
||||
lower_path.push(PathStep {
|
||||
segment: Arc::clone(&parent),
|
||||
idx: parent_idx,
|
||||
len: parent.segments.len(),
|
||||
code_idxs: parent.code_indices().clone(),
|
||||
});
|
||||
|
||||
midpoint = Arc::unwrap_or_clone(parent);
|
||||
if midpoint == self {
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse the path so far
|
||||
lower_path.reverse();
|
||||
|
||||
// If we have already found the parent, return.
|
||||
if midpoint == self {
|
||||
return lower_path;
|
||||
}
|
||||
// If we've gone all the way up to the file segment, return empty.
|
||||
if midpoint.class_is_type(&["file"]) {
|
||||
return vec![];
|
||||
}
|
||||
// Check if midpoint is within self's range.
|
||||
if !(self.get_start_loc() <= midpoint.get_start_loc()
|
||||
&& midpoint.get_start_loc() <= self.get_end_loc())
|
||||
{
|
||||
return vec![];
|
||||
}
|
||||
|
||||
// Now, work downward from `self` toward `midpoint`.
|
||||
for (idx, seg) in self.segments.clone().iter().enumerate() {
|
||||
// Set the parent if it's not already set.
|
||||
let seg = seg.clone();
|
||||
seg.clone().set_parent(Arc::new(self.clone()), idx);
|
||||
|
||||
let step = PathStep {
|
||||
segment: Arc::new(self.clone()),
|
||||
idx,
|
||||
len: self.segments.clone().len(),
|
||||
code_idxs: self.code_indices().clone(),
|
||||
};
|
||||
|
||||
// If we found the target
|
||||
if seg == midpoint {
|
||||
let mut result = vec![step];
|
||||
result.extend(lower_path);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check recursively if a path exists
|
||||
let res = seg.path_to(midpoint.clone());
|
||||
if !res.is_empty() {
|
||||
let mut result = vec![step];
|
||||
result.extend(res);
|
||||
result.extend(lower_path);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Not found.
|
||||
vec![]
|
||||
}
|
||||
|
||||
pub fn get_start_loc(&self) -> (usize, usize) {
|
||||
self.pos_marker
|
||||
.clone()
|
||||
.expect("PositionMarker unset")
|
||||
.working_loc()
|
||||
}
|
||||
|
||||
pub fn get_end_loc(&self) -> (usize, usize) {
|
||||
self.pos_marker
|
||||
.clone()
|
||||
.expect("PositionMarker unset")
|
||||
.working_loc_after(&self.raw)
|
||||
}
|
||||
|
||||
pub fn recursive_crawl_all(&self, reverse: bool) -> Box<dyn Iterator<Item = &Token> + '_> {
|
||||
if reverse {
|
||||
Box::new(
|
||||
self.segments
|
||||
.iter()
|
||||
.rev()
|
||||
.flat_map(move |seg| seg.recursive_crawl_all(reverse))
|
||||
.chain(std::iter::once(self)),
|
||||
)
|
||||
} else {
|
||||
Box::new(
|
||||
std::iter::once(self).chain(
|
||||
self.segments
|
||||
.iter()
|
||||
.flat_map(move |seg| seg.recursive_crawl_all(reverse)),
|
||||
),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn preface(&self, ident: usize, tabsize: usize) -> String {
|
||||
let padding = " ".repeat(ident * tabsize);
|
||||
let padded_type = format!("{}{}{}:", padding, self.preface_modifier, self.get_type());
|
||||
|
||||
let pos = self.pos_marker.clone();
|
||||
let suffix = self.suffix.clone();
|
||||
|
||||
let preface = format!(
|
||||
"{:<20}|{:<60} {}",
|
||||
pos.clone()
|
||||
.expect("PositionMarker unset")
|
||||
.to_source_string(),
|
||||
padded_type,
|
||||
suffix
|
||||
);
|
||||
|
||||
preface.trim_end().to_string()
|
||||
}
|
||||
|
||||
pub fn to_tuple(
|
||||
&self,
|
||||
code_only: Option<bool>,
|
||||
show_raw: Option<bool>,
|
||||
include_meta: Option<bool>,
|
||||
) -> TupleSerialisedSegment {
|
||||
let code_only = code_only.unwrap_or_default();
|
||||
let show_raw = show_raw.unwrap_or_default();
|
||||
let include_meta = include_meta.unwrap_or_default();
|
||||
// If `show_raw` is true and there are no child segments, return (type, raw)
|
||||
if show_raw && self.segments.is_empty() {
|
||||
return TupleSerialisedSegment::Str(self.get_type(), self.raw.clone());
|
||||
}
|
||||
|
||||
// Determine filtering criteria for child segments
|
||||
let filtered_segments: Vec<TupleSerialisedSegment> = self
|
||||
.segments
|
||||
.iter()
|
||||
.filter(|seg| {
|
||||
if code_only {
|
||||
seg.is_code && !seg.is_meta
|
||||
} else {
|
||||
include_meta || !seg.is_meta
|
||||
}
|
||||
})
|
||||
.map(|seg| seg.to_tuple(Some(code_only), Some(show_raw), Some(include_meta)))
|
||||
.collect();
|
||||
|
||||
TupleSerialisedSegment::Nested(self.get_type(), filtered_segments)
|
||||
}
|
||||
|
||||
pub fn copy(
|
||||
&self,
|
||||
segments: Option<Vec<Token>>,
|
||||
parent: Option<Arc<Token>>,
|
||||
parent_idx: Option<usize>,
|
||||
) -> Token {
|
||||
let mut new_segment = self.clone();
|
||||
new_segment.parent = parent.as_ref().map(Arc::downgrade);
|
||||
new_segment.parent_idx = parent_idx;
|
||||
|
||||
if let Some(ref segs) = segments {
|
||||
new_segment.segments = segs.clone();
|
||||
} else {
|
||||
new_segment.segments = self
|
||||
.segments
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(idx, seg)| {
|
||||
seg.copy(
|
||||
None,
|
||||
Some(Arc::new(new_segment.clone())),
|
||||
Some(idx),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
}
|
||||
|
||||
new_segment
|
||||
}
|
||||
|
||||
pub fn position_segments(segments: &[Token], parent_pos: PositionMarker) -> Vec<Token> {
|
||||
assert!(
|
||||
!segments.is_empty(),
|
||||
"position_segments called on empty sequence."
|
||||
);
|
||||
let mut line_no = parent_pos.working_line_no;
|
||||
let mut line_pos = parent_pos.working_line_pos;
|
||||
|
||||
let mut segment_buffer = Vec::new();
|
||||
|
||||
for (idx, segment) in segments.iter().enumerate() {
|
||||
let old_position = segment.pos_marker.clone();
|
||||
let mut new_position = segment.pos_marker.clone();
|
||||
|
||||
// If position is missing, try to infer it
|
||||
if new_position.is_none() {
|
||||
let mut start_point = None;
|
||||
if idx > 0 {
|
||||
let prev_seg: &Token = &segment_buffer[idx - 1];
|
||||
if let Some(ref pos_marker) = prev_seg.pos_marker {
|
||||
start_point = Some(pos_marker.end_point_marker());
|
||||
}
|
||||
} else {
|
||||
start_point = Some(parent_pos.start_point_marker());
|
||||
}
|
||||
|
||||
// Search forward for the end point
|
||||
let mut end_point = None;
|
||||
for fwd_seg in &segments[idx + 1..] {
|
||||
if let Some(ref pos_marker) = fwd_seg.pos_marker {
|
||||
end_point = Some(pos_marker.start_point_marker());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
new_position = match (start_point, end_point) {
|
||||
(Some(start), Some(end)) if start != end => {
|
||||
Some(PositionMarker::from_points(&start, &end))
|
||||
}
|
||||
(Some(start), _) => Some(start),
|
||||
(_, Some(end)) => Some(end),
|
||||
_ => panic!("Unable to position new segment"),
|
||||
};
|
||||
}
|
||||
|
||||
let new_position = new_position.expect("Position should be assigned");
|
||||
let new_position = new_position.with_working_position(line_no, line_pos);
|
||||
let (new_line_no, new_line_pos) =
|
||||
new_position.infer_next_position(&segment.raw, line_no, line_pos);
|
||||
line_no = new_line_no;
|
||||
line_pos = new_line_pos;
|
||||
|
||||
// If position changed, recursively process child segments before copying
|
||||
let new_segment =
|
||||
if !segment.segments.is_empty() && old_position != Some(new_position.clone()) {
|
||||
let child_segments =
|
||||
Token::position_segments(&segment.segments, new_position.clone());
|
||||
segment.copy(Some(child_segments), None, None)
|
||||
} else {
|
||||
segment.copy(None, None, None)
|
||||
};
|
||||
|
||||
segment_buffer.push(new_segment);
|
||||
}
|
||||
|
||||
segment_buffer
|
||||
}
|
||||
|
||||
// /// Simplifies the structure of the token recursively for serialization.
|
||||
// pub fn structural_simplify(&self) -> HashMap<String, Option<serde_json::Value>> {
|
||||
// let mut result = HashMap::new();
|
||||
// let key = self.get_type();
|
||||
|
||||
// if self.segments.is_empty() {
|
||||
// // If there are no child segments, return the raw value.
|
||||
// result.insert(key, Some(serde_json::Value::String(self.raw.clone())));
|
||||
// } else {
|
||||
// // Simplify all child segments recursively.
|
||||
// let mut child_results = Vec::new();
|
||||
// for segment in &self.segments {
|
||||
// child_results.push(serde_json::Value::Object(
|
||||
// segment.structural_simplify(),
|
||||
// ));
|
||||
// }
|
||||
|
||||
// // Check for duplicate keys in child results.
|
||||
// let mut subkeys = Vec::new();
|
||||
// for child in &child_results {
|
||||
// if let serde_json::Value::Object(map) = child {
|
||||
// subkeys.extend(map.keys().cloned());
|
||||
// }
|
||||
// }
|
||||
|
||||
// if subkeys.len() != subkeys.iter().collect::<std::collections::HashSet<_>>().len() {
|
||||
// // If there are duplicate keys, use a list of child objects.
|
||||
// result.insert(key, Some(serde_json::Value::Array(child_results)));
|
||||
// } else {
|
||||
// // Otherwise, merge child objects into a single map.
|
||||
// let mut merged_map = HashMap::new();
|
||||
// for child in child_results {
|
||||
// if let serde_json::Value::Object(map) = child {
|
||||
// for (k, v) in map {
|
||||
// merged_map.insert(k, v);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// result.insert(key, Some(serde_json::Value::Object(merged_map)));
|
||||
// }
|
||||
// }
|
||||
|
||||
// result
|
||||
// }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::matcher::TokenGenerator;
|
||||
use crate::slice::Slice;
|
||||
use crate::templater::templatefile::TemplatedFile;
|
||||
|
||||
use super::*;
|
||||
|
||||
/// Roughly generate test segments.
|
||||
///
|
||||
/// This function isn't totally robust, but good enough
|
||||
/// for testing. Use with caution.
|
||||
fn generate_test_segments(elems: &[&str]) -> Vec<Token> {
|
||||
let mut buff = vec![];
|
||||
let templated_file = Arc::new(TemplatedFile::from(
|
||||
elems.iter().cloned().collect::<String>(),
|
||||
));
|
||||
let mut idx = 0;
|
||||
|
||||
for elem in elems {
|
||||
let elem = &**elem;
|
||||
if elem == "<indent>" {
|
||||
buff.push(Token::indent_token(
|
||||
PositionMarker::from_point(idx, idx, &templated_file, None, None),
|
||||
false,
|
||||
None,
|
||||
HashSet::new(),
|
||||
));
|
||||
continue;
|
||||
} else if elem == "<dedent>" {
|
||||
buff.push(Token::dedent_token(
|
||||
PositionMarker::from_point(idx, idx, &templated_file, None, None),
|
||||
false,
|
||||
None,
|
||||
HashSet::new(),
|
||||
));
|
||||
continue;
|
||||
}
|
||||
let (token_fn, instance_types): (TokenGenerator, Vec<String>) =
|
||||
match elem {
|
||||
" " | "\t" => (
|
||||
Token::whitespace_token_compat,
|
||||
Vec::new(),
|
||||
),
|
||||
"\n" => (Token::newline_token_compat, Vec::new()),
|
||||
"(" => (
|
||||
Token::symbol_token_compat,
|
||||
Vec::from_iter(["start_bracket".to_string()]),
|
||||
),
|
||||
")" => (
|
||||
Token::symbol_token_compat,
|
||||
Vec::from_iter(["end_bracket".to_string()]),
|
||||
),
|
||||
"[" => (
|
||||
Token::symbol_token_compat,
|
||||
Vec::from_iter(["start_square_bracket".to_string()]),
|
||||
),
|
||||
"]" => (
|
||||
Token::symbol_token_compat,
|
||||
Vec::from_iter(["end_square_bracket".to_string()]),
|
||||
),
|
||||
s if s.starts_with("--") => (
|
||||
Token::comment_token_compat,
|
||||
Vec::from_iter(["inline_comment".to_string()]),
|
||||
),
|
||||
s if s.starts_with("\"") => (
|
||||
Token::code_token_compat,
|
||||
Vec::from_iter(["double_quote".to_string()]),
|
||||
),
|
||||
s if s.starts_with("'") => (
|
||||
Token::code_token_compat,
|
||||
Vec::from_iter(["single_quote".to_string()]),
|
||||
),
|
||||
_ => (Token::code_token_compat, Vec::new()),
|
||||
};
|
||||
|
||||
buff.push(token_fn(
|
||||
elem.into(),
|
||||
PositionMarker::new(
|
||||
Slice {
|
||||
start: idx,
|
||||
stop: idx + elem.len(),
|
||||
},
|
||||
Slice {
|
||||
start: idx,
|
||||
stop: idx + elem.len(),
|
||||
},
|
||||
&templated_file,
|
||||
None,
|
||||
None,
|
||||
),
|
||||
HashSet::new(),
|
||||
instance_types,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
));
|
||||
idx += elem.len();
|
||||
}
|
||||
|
||||
buff
|
||||
}
|
||||
|
||||
fn raw_segments() -> Vec<Token> {
|
||||
generate_test_segments(&["foobar", ".barfoo"])
|
||||
}
|
||||
|
||||
#[test]
|
||||
/// Test niche case of calling get_raw_segments on a raw segment.
|
||||
fn test_parser_raw_get_raw_segments() {
|
||||
for s in raw_segments() {
|
||||
assert_eq!(s.raw_segments(), [s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
10
sqlfluffrs/src/token/path.rs
Normal file
10
sqlfluffrs/src/token/path.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
use super::Token;
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PathStep {
|
||||
pub segment: Arc<Token>,
|
||||
pub idx: usize,
|
||||
pub len: usize,
|
||||
pub code_idxs: Vec<usize>,
|
||||
}
|
||||
567
sqlfluffrs/src/token/python.rs
Normal file
567
sqlfluffrs/src/token/python.rs
Normal file
@@ -0,0 +1,567 @@
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use hashbrown::HashSet;
|
||||
use pyo3::{
|
||||
prelude::*,
|
||||
types::{PyDict, PyString, PyTuple, PyType},
|
||||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{
|
||||
marker::python::{PyPositionMarker, PySqlFluffPositionMarker},
|
||||
regex::RegexModeGroup,
|
||||
};
|
||||
|
||||
use super::{path::PathStep, SourceFix, Token, TupleSerialisedSegment};
|
||||
|
||||
#[pyclass(name = "RsSourceFix")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone)]
|
||||
pub struct PySourceFix(pub SourceFix);
|
||||
|
||||
impl From<PySourceFix> for SourceFix {
|
||||
fn from(value: PySourceFix) -> SourceFix {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SourceFix> for PySourceFix {
|
||||
fn from(value: SourceFix) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(name = "RsPathStep")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone)]
|
||||
pub struct PyPathStep(pub PathStep);
|
||||
|
||||
impl From<PyPathStep> for PathStep {
|
||||
fn from(value: PyPathStep) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PathStep> for PyPathStep {
|
||||
fn from(value: PathStep) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(name = "RsTupleSerialisedSegment")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Clone)]
|
||||
pub struct PyTupleSerialisedSegment(pub TupleSerialisedSegment);
|
||||
|
||||
impl PyTupleSerialisedSegment {
|
||||
pub fn to_py_tuple<'py>(&self, py: Python<'py>) -> Result<Bound<'py, PyTuple>, PyErr> {
|
||||
match &self.0 {
|
||||
TupleSerialisedSegment::Str(segment_type, raw_value) => {
|
||||
PyTuple::new(py, [segment_type, raw_value])
|
||||
}
|
||||
TupleSerialisedSegment::Nested(segment_type, segments) => {
|
||||
let py_segment_type = PyString::new(py, segment_type);
|
||||
let py_segments: Vec<_> = segments
|
||||
.iter()
|
||||
.map(|s| {
|
||||
PyTupleSerialisedSegment::to_py_tuple(
|
||||
&PyTupleSerialisedSegment(s.clone()),
|
||||
py,
|
||||
)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
let pt_segments_tuple = PyTuple::new(py, &py_segments)?;
|
||||
|
||||
PyTuple::new(
|
||||
py,
|
||||
&[py_segment_type.into_any(), pt_segments_tuple.into_any()],
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyTupleSerialisedSegment> for TupleSerialisedSegment {
|
||||
fn from(value: PyTupleSerialisedSegment) -> Self {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TupleSerialisedSegment> for PyTupleSerialisedSegment {
|
||||
fn from(value: TupleSerialisedSegment) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[pyclass(name = "RsToken", weakref, module = "sqlfluffrs")]
|
||||
#[repr(transparent)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PyToken(pub Token);
|
||||
|
||||
#[pymethods]
|
||||
impl PyToken {
|
||||
#[getter]
|
||||
pub fn raw(&self) -> String {
|
||||
self.0.raw.to_string()
|
||||
}
|
||||
|
||||
pub fn raw_trimmed(&self) -> String {
|
||||
self.0.raw_trimmed()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn pos_marker(&self) -> Option<PyPositionMarker> {
|
||||
self.0.pos_marker.clone().map(PyPositionMarker)
|
||||
}
|
||||
|
||||
#[setter]
|
||||
pub fn set_pos_marker(&mut self, value: Option<PySqlFluffPositionMarker>) {
|
||||
self.0.pos_marker = value.map(Into::into);
|
||||
}
|
||||
|
||||
pub fn get_type(&self) -> String {
|
||||
self.0.get_type()
|
||||
}
|
||||
|
||||
#[getter(r#type)]
|
||||
pub fn type_(&self) -> String {
|
||||
self.0.get_type()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn is_templated(&self) -> bool {
|
||||
self.0.is_templated()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn is_code(&self) -> bool {
|
||||
self.0.is_code
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn is_meta(&self) -> bool {
|
||||
self.0.is_meta
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn source_str(&self) -> Option<String> {
|
||||
self.0.source_str.clone()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn block_type(&self) -> Option<String> {
|
||||
self.0.block_type()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn block_uuid(&self) -> Option<Uuid> {
|
||||
self.0.block_uuid
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn cache_key(&self) -> String {
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
|
||||
let mut hasher = DefaultHasher::new();
|
||||
self.0.token_type.hash(&mut hasher);
|
||||
for t in &self.0.instance_types {
|
||||
t.hash(&mut hasher);
|
||||
}
|
||||
format!("{:016x}", hasher.finish())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn trim_start(&self) -> Option<Vec<String>> {
|
||||
self.0.trim_start.clone()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn trim_chars(&self) -> Option<Vec<String>> {
|
||||
self.0.trim_chars.clone()
|
||||
}
|
||||
|
||||
#[pyo3(signature = (raw_only = false))]
|
||||
pub fn count_segments(&self, raw_only: Option<bool>) -> usize {
|
||||
self.0.count_segments(raw_only.unwrap_or_default())
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*seg_type))]
|
||||
pub fn is_type(&self, seg_type: &Bound<'_, PyTuple>) -> bool {
|
||||
let seg_strs = seg_type
|
||||
.extract::<Vec<String>>()
|
||||
.expect("args should be all strings");
|
||||
self.0
|
||||
.is_type(&seg_strs.iter().map(String::as_str).collect::<Vec<&str>>())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn indent_val(&self) -> i32 {
|
||||
self.0.indent_value
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn is_whitespace(&self) -> bool {
|
||||
self.0.is_whitespace
|
||||
}
|
||||
|
||||
pub fn is_raw(&self) -> bool {
|
||||
self.0.is_raw()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn is_comment(&self) -> bool {
|
||||
self.0.is_comment
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn class_types(&self) -> HashSet<String> {
|
||||
self.0.class_types()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn instance_types(&self) -> Vec<String> {
|
||||
self.0.instance_types.clone()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn preface_modifier(&self) -> String {
|
||||
self.0.preface_modifier.clone()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn source_fixes(&self) -> Vec<PySourceFix> {
|
||||
self.0.source_fixes().into_iter().map(Into::into).collect()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn _source_fixes(&self) -> Option<Vec<PySourceFix>> {
|
||||
self.0
|
||||
.source_fixes
|
||||
.clone()
|
||||
.map(|sf| sf.into_iter().map(Into::into).collect())
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*seg_type))]
|
||||
pub fn class_is_type(&self, seg_type: &Bound<'_, PyTuple>) -> bool {
|
||||
let seg_strs = seg_type
|
||||
.extract::<Vec<String>>()
|
||||
.expect("args should be all strings");
|
||||
self.0
|
||||
.class_is_type(&seg_strs.iter().map(String::as_str).collect::<Vec<&str>>())
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn first_non_whitespace_segment_raw_upper(&self) -> Option<String> {
|
||||
self.0.first_non_whitespace_segment_raw_upper()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn raw_upper(&self) -> String {
|
||||
self.0.raw_upper()
|
||||
}
|
||||
|
||||
pub fn invalidate_caches(&self) {}
|
||||
|
||||
#[getter]
|
||||
pub fn uuid(&self) -> u128 {
|
||||
self.0.uuid
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn descendant_type_set(&self) -> HashSet<String> {
|
||||
self.0.descendant_type_set()
|
||||
}
|
||||
|
||||
#[pyo3(signature = (*seg_type, recurse_into = true, no_recursive_seg_type = None, allow_self = true))]
|
||||
pub fn recursive_crawl(
|
||||
&self,
|
||||
seg_type: &Bound<'_, PyTuple>,
|
||||
recurse_into: bool,
|
||||
no_recursive_seg_type: Option<Bound<'_, PyAny>>,
|
||||
allow_self: bool,
|
||||
) -> Vec<PyToken> {
|
||||
let seg_type = seg_type
|
||||
.extract::<Vec<String>>()
|
||||
.expect("args should be all strings");
|
||||
let temp: Option<Vec<String>> = match no_recursive_seg_type {
|
||||
Some(py_any) => {
|
||||
if let Ok(single_str) = py_any.extract::<String>() {
|
||||
Some(vec![single_str]) // Convert single string into a Vec<String>
|
||||
} else if let Ok(list_of_str) = py_any.extract::<Vec<String>>() {
|
||||
Some(list_of_str) // Already a Vec<String>, return as is
|
||||
} else {
|
||||
Some(vec![]) // If it's neither, return an empty vector
|
||||
}
|
||||
}
|
||||
None => None, // If None, return an empty vector
|
||||
};
|
||||
let no_recursive_seg_type: Option<Vec<&str>> = temp
|
||||
.as_ref()
|
||||
.map(|vec| vec.iter().map(String::as_str).collect());
|
||||
|
||||
self.0
|
||||
.recursive_crawl(
|
||||
&seg_type.iter().map(String::as_str).collect::<Vec<&str>>(),
|
||||
recurse_into,
|
||||
no_recursive_seg_type.as_deref(),
|
||||
allow_self,
|
||||
)
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn recursive_crawl_all(&self, reverse: bool) -> Vec<PyToken> {
|
||||
self.0
|
||||
.recursive_crawl_all(reverse)
|
||||
.map(|t| t.clone().into())
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn segments(&self) -> Vec<PyToken> {
|
||||
self.0
|
||||
.segments
|
||||
.clone()
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn path_to(&self, other: PyToken) -> Vec<PyPathStep> {
|
||||
self.0
|
||||
.clone()
|
||||
.path_to(other.into())
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn get_start_loc(&self) -> (usize, usize) {
|
||||
self.0.get_start_loc()
|
||||
}
|
||||
|
||||
pub fn get_end_loc(&self) -> (usize, usize) {
|
||||
self.0.get_end_loc()
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn raw_segments(&self) -> Vec<PyToken> {
|
||||
self.0.raw_segments().into_iter().map(Into::into).collect()
|
||||
}
|
||||
|
||||
pub fn _get_raw_segment_kwargs<'py>(&self, py: Python<'py>) -> Bound<'py, PyDict> {
|
||||
let dict = PyDict::new(py);
|
||||
if let Some(ref quoted_value) = self.0.quoted_value {
|
||||
dict.set_item("quoted_value", quoted_value.clone()).unwrap();
|
||||
} else {
|
||||
dict.set_item("quoted_value", py.None()).unwrap();
|
||||
}
|
||||
if let Some(ref escape_replacement) = self.0.escape_replacement {
|
||||
dict.set_item("escape_replacements", vec![escape_replacement])
|
||||
.unwrap();
|
||||
} else {
|
||||
dict.set_item("escape_replacements", py.None()).unwrap();
|
||||
}
|
||||
dict
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn quoted_value(&self, py: Python<'_>) -> Option<(String, Py<PyAny>)> {
|
||||
self.0.quoted_value.clone().map(|(s, g)| {
|
||||
let py_group: Py<PyAny> = match g {
|
||||
RegexModeGroup::Index(idx) => idx.into_pyobject(py).unwrap().into(),
|
||||
RegexModeGroup::Name(name) => name.into_pyobject(py).unwrap().into(),
|
||||
};
|
||||
(s, py_group)
|
||||
})
|
||||
}
|
||||
|
||||
#[getter]
|
||||
pub fn escape_replacements(&self) -> Option<Vec<(String, String)>> {
|
||||
if self.0.escape_replacement.is_none() {
|
||||
None
|
||||
} else {
|
||||
Some(vec![self.0.escape_replacement.clone().unwrap()])
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_parent(&self, parent: &Bound<'_, PyAny>, idx: usize) -> PyResult<()> {
|
||||
let parent: Arc<Token> = parent
|
||||
.extract()
|
||||
.map(|t: PySqlFluffToken| Arc::new(t.0 .0))?;
|
||||
let mut inner = self.0.clone();
|
||||
inner.set_parent(parent, idx);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get_parent(&self) -> Option<(PyToken, i32)> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn iter_unparsables(&self) -> Vec<PyToken> {
|
||||
self.0
|
||||
.iter_unparseables()
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[pyo3(signature = (ident=0, tabsize=4, code_only=false))]
|
||||
pub fn stringify(
|
||||
&self,
|
||||
ident: Option<usize>,
|
||||
tabsize: Option<usize>,
|
||||
code_only: Option<bool>,
|
||||
) -> String {
|
||||
self.0.stringify(
|
||||
ident.unwrap_or(0),
|
||||
tabsize.unwrap_or(4),
|
||||
code_only.unwrap_or_default(),
|
||||
)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (code_only=None, show_raw=None, include_meta=None))]
|
||||
pub fn to_tuple<'py>(
|
||||
&self,
|
||||
py: Python<'py>,
|
||||
code_only: Option<bool>,
|
||||
show_raw: Option<bool>,
|
||||
include_meta: Option<bool>,
|
||||
) -> Result<Bound<'py, PyTuple>, PyErr> {
|
||||
PyTupleSerialisedSegment(self.0.to_tuple(code_only, show_raw, include_meta)).to_py_tuple(py)
|
||||
}
|
||||
|
||||
// pub fn structural_simplify(&self) -> HashMap<String, Option<serde_json::Value>> {
|
||||
// self.0
|
||||
// .structural_simplify()
|
||||
// .into_iter()
|
||||
// .map(|(k, v)| (k, v.map(|v| serde_json::to_value(v).unwrap())))
|
||||
// .collect()
|
||||
// }
|
||||
|
||||
#[pyo3(signature = (segments=None, parent=None, parent_idx=None))]
|
||||
pub fn copy(
|
||||
&self,
|
||||
segments: Option<Vec<PySqlFluffToken>>,
|
||||
parent: Option<PySqlFluffToken>,
|
||||
parent_idx: Option<usize>,
|
||||
) -> PyToken {
|
||||
PyToken(
|
||||
self.0.copy(
|
||||
segments.map(|s| s.into_iter().map(Into::into).collect()),
|
||||
parent
|
||||
.as_ref()
|
||||
.map(|parent_token| Arc::clone(&parent_token.0 .0.clone().into())),
|
||||
parent_idx,
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
#[pyo3(signature = (raw=None, source_fixes=None))]
|
||||
pub fn edit(&self, raw: Option<String>, source_fixes: Option<Vec<PySourceFix>>) -> Self {
|
||||
Self(self.0.edit(
|
||||
raw,
|
||||
source_fixes.map(|sf| sf.into_iter().map(Into::into).collect()),
|
||||
))
|
||||
}
|
||||
|
||||
#[classmethod]
|
||||
pub fn position_segments<'py>(
|
||||
_cls: &Bound<'py, PyType>,
|
||||
py: Python<'py>,
|
||||
segments: Vec<PySqlFluffToken>,
|
||||
parent_pos: PySqlFluffPositionMarker,
|
||||
) -> Result<Bound<'py, PyTuple>, PyErr> {
|
||||
let tokens = Token::position_segments(
|
||||
&segments
|
||||
.into_iter()
|
||||
.map(|s| s.into())
|
||||
.collect::<Vec<Token>>(),
|
||||
parent_pos.into(),
|
||||
);
|
||||
PyTuple::new(
|
||||
py,
|
||||
tokens.into_iter().map(Into::into).collect::<Vec<PyToken>>(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn __repr__(&self) -> String {
|
||||
format!("{}", self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for PyToken {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PyToken> for Token {
|
||||
fn from(value: PyToken) -> Token {
|
||||
value.0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Token> for PyToken {
|
||||
fn from(value: Token) -> Self {
|
||||
Self(value)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(IntoPyObject)]
|
||||
pub struct PySqlFluffToken(pub PyToken);
|
||||
|
||||
impl<'py> FromPyObject<'py> for PySqlFluffToken {
|
||||
fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult<Self> {
|
||||
let raw = ob.getattr("raw")?.extract::<String>()?;
|
||||
let class_types = ob
|
||||
.getattr("_class_types")
|
||||
.unwrap_or(ob.getattr("class_types")?)
|
||||
.extract::<HashSet<String>>()?
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<HashSet<String>>();
|
||||
let instance_types = ob
|
||||
.getattr("instance_types")?
|
||||
.extract::<Vec<String>>()?
|
||||
.into_iter()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
let segments = ob
|
||||
.getattr("segments")?
|
||||
.extract::<Vec<PySqlFluffToken>>()
|
||||
.map(|s| s.into_iter().map(Into::into).collect::<Vec<Token>>())?;
|
||||
let pos_marker = ob
|
||||
.getattr("pos_marker")?
|
||||
.extract::<PySqlFluffPositionMarker>()?;
|
||||
|
||||
use crate::token::config::TokenConfig;
|
||||
Ok(Self(PyToken(Token::base_token(
|
||||
raw,
|
||||
pos_marker.into(),
|
||||
TokenConfig {
|
||||
class_types,
|
||||
instance_types,
|
||||
..TokenConfig::default()
|
||||
},
|
||||
segments,
|
||||
))))
|
||||
}
|
||||
}
|
||||
|
||||
impl From<PySqlFluffToken> for Token {
|
||||
fn from(value: PySqlFluffToken) -> Token {
|
||||
value.0 .0
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Token> for PySqlFluffToken {
|
||||
fn from(value: Token) -> Self {
|
||||
Self(PyToken(value))
|
||||
}
|
||||
}
|
||||
@@ -17,6 +17,12 @@ if TYPE_CHECKING: # pragma: no cover
|
||||
from sqlfluff.core.parser import BaseSegment, PositionMarker
|
||||
from sqlfluff.core.rules import BaseRule, LintFix
|
||||
|
||||
try:
|
||||
from sqlfluffrs import RsSQLLexerError
|
||||
except ImportError:
|
||||
...
|
||||
|
||||
|
||||
CheckTuple = tuple[str, int, int]
|
||||
SerializedObject = dict[str, Union[str, int, bool, list["SerializedObject"]]]
|
||||
|
||||
@@ -181,6 +187,18 @@ class SQLLexError(SQLBaseError):
|
||||
_code = "LXR"
|
||||
_identifier = "lexing"
|
||||
|
||||
@classmethod
|
||||
def from_rs_error(cls, rs_error: "RsSQLLexerError") -> "SQLLexError":
|
||||
"""Create a SQLLexError from a RsSQLLexerError."""
|
||||
return cls(
|
||||
description=rs_error.desc,
|
||||
line_no=rs_error.line_no,
|
||||
line_pos=rs_error.line_pos,
|
||||
ignore=rs_error.ignore,
|
||||
fatal=rs_error.fatal,
|
||||
warning=rs_error.warning,
|
||||
)
|
||||
|
||||
|
||||
class SQLParseError(SQLBaseError):
|
||||
"""An error which occurred during parsing.
|
||||
|
||||
@@ -51,7 +51,7 @@ class ParsedVariant(NamedTuple):
|
||||
lexing_violations (:obj:`list` of :obj:`SQLLexError`): Any violations
|
||||
raised during the lexing phase.
|
||||
parsing_violations (:obj:`list` of :obj:`SQLParseError`): Any violations
|
||||
raised during the lexing phase.
|
||||
raised during the parsing phase.
|
||||
"""
|
||||
|
||||
templated_file: TemplatedFile
|
||||
|
||||
@@ -14,7 +14,13 @@ from sqlfluff.core.parser.grammar import (
|
||||
Ref,
|
||||
Sequence,
|
||||
)
|
||||
from sqlfluff.core.parser.lexer import Lexer, RegexLexer, StringLexer
|
||||
from sqlfluff.core.parser.lexer import (
|
||||
LexerType,
|
||||
PyLexer,
|
||||
RegexLexer,
|
||||
StringLexer,
|
||||
get_lexer_class,
|
||||
)
|
||||
from sqlfluff.core.parser.markers import PositionMarker
|
||||
from sqlfluff.core.parser.matchable import Matchable
|
||||
from sqlfluff.core.parser.parser import Parser
|
||||
@@ -52,6 +58,9 @@ from sqlfluff.core.parser.segments import (
|
||||
)
|
||||
from sqlfluff.core.parser.types import ParseMode
|
||||
|
||||
# Get the appropriate lexer class (PyRsLexer if available, otherwise PyLexer)
|
||||
Lexer = get_lexer_class()
|
||||
|
||||
__all__ = (
|
||||
"BaseSegment",
|
||||
"SourceFix",
|
||||
@@ -95,6 +104,8 @@ __all__ = (
|
||||
"RegexParser",
|
||||
"PositionMarker",
|
||||
"Lexer",
|
||||
"PyLexer",
|
||||
"LexerType",
|
||||
"StringLexer",
|
||||
"RegexLexer",
|
||||
"Parser",
|
||||
|
||||
@@ -15,7 +15,9 @@ from sqlfluff.core.parser.segments import (
|
||||
BaseSegment,
|
||||
Dedent,
|
||||
EndOfFile,
|
||||
ImplicitIndent,
|
||||
Indent,
|
||||
LiteralKeywordSegment,
|
||||
MetaSegment,
|
||||
RawSegment,
|
||||
TemplateLoop,
|
||||
@@ -723,7 +725,7 @@ def _iter_segments(
|
||||
)
|
||||
|
||||
|
||||
class Lexer:
|
||||
class PyLexer:
|
||||
"""The Lexer class actually does the lexing step."""
|
||||
|
||||
def __init__(
|
||||
@@ -825,7 +827,9 @@ class Lexer:
|
||||
return tuple(segment_buffer)
|
||||
|
||||
@staticmethod
|
||||
def violations_from_segments(segments: tuple[RawSegment, ...]) -> list[SQLLexError]:
|
||||
def violations_from_segments(
|
||||
segments: tuple[RawSegment, ...],
|
||||
) -> list[SQLLexError]:
|
||||
"""Generate any lexing errors for any unlexables."""
|
||||
violations = []
|
||||
for segment in segments:
|
||||
@@ -887,3 +891,87 @@ class Lexer:
|
||||
f"{template.templated_str[template_slice]!r}"
|
||||
)
|
||||
return templated_buff
|
||||
|
||||
|
||||
try:
|
||||
from sqlfluffrs import RsLexer, RsToken
|
||||
|
||||
def get_segment_type_map(base_class: type) -> dict[str, type[RawSegment]]:
|
||||
"""Dynamically create a map of segment types to their subclasses."""
|
||||
segment_map = {}
|
||||
for subclass in base_class.__subclasses__():
|
||||
if subclass is LiteralKeywordSegment or subclass is ImplicitIndent:
|
||||
continue
|
||||
if (
|
||||
hasattr(subclass, "type") and subclass.type
|
||||
): # Ensure the subclass has a type
|
||||
segment_map[subclass.type] = subclass
|
||||
# Recursively add subclasses of subclasses
|
||||
segment_map.update(get_segment_type_map(subclass))
|
||||
return segment_map
|
||||
|
||||
# Dynamically generate the segment_types map
|
||||
segment_types = get_segment_type_map(RawSegment)
|
||||
|
||||
class PyRsLexer(RsLexer):
|
||||
"""A wrapper around the sqlfluffrs lexer."""
|
||||
|
||||
@staticmethod
|
||||
def _tokens_to_segments(
|
||||
tokens: list["RsToken"], py_template: TemplatedFile
|
||||
) -> tuple[BaseSegment, ...]:
|
||||
"""Convert tokens to segments."""
|
||||
return tuple(
|
||||
segment_types.get(token.type, RawSegment).from_rstoken(
|
||||
token, py_template
|
||||
)
|
||||
for token in tokens
|
||||
)
|
||||
|
||||
def lex(
|
||||
self, raw: Union[str, TemplatedFile]
|
||||
) -> tuple[tuple[BaseSegment, ...], list[SQLLexError]]:
|
||||
"""Take a string or TemplatedFile and return segments."""
|
||||
tokens, errors = self._lex(raw)
|
||||
first_token = tokens[0]
|
||||
assert first_token
|
||||
template = first_token.pos_marker.templated_file
|
||||
py_template = TemplatedFile(
|
||||
template.source_str,
|
||||
template.fname,
|
||||
template.templated_str,
|
||||
template.sliced_file, # type: ignore
|
||||
template.raw_sliced, # type: ignore
|
||||
)
|
||||
|
||||
return (
|
||||
self._tokens_to_segments(tokens, py_template),
|
||||
[SQLLexError.from_rs_error(error) for error in errors],
|
||||
)
|
||||
|
||||
_HAS_RUST_LEXER = True
|
||||
lexer_logger.info("Using sqlfluffrs lexer.")
|
||||
except ImportError:
|
||||
PyRsLexer = None # type: ignore[assignment, misc]
|
||||
_HAS_RUST_LEXER = False
|
||||
lexer_logger.info("sqlfluffrs lexer not present or failed to load.")
|
||||
|
||||
|
||||
def get_lexer_class() -> type[Union[PyLexer, "PyRsLexer"]]:
|
||||
"""Get the appropriate lexer class based on availability.
|
||||
|
||||
Returns PyRsLexer if the Rust extension is available,
|
||||
otherwise returns PyLexer.
|
||||
|
||||
This function provides a single point of lexer selection,
|
||||
making it easy to instantiate the correct lexer:
|
||||
|
||||
Lexer = get_lexer_class()
|
||||
lexer = Lexer(config=config)
|
||||
|
||||
Returns:
|
||||
The lexer class to use (PyRsLexer or PyLexer).
|
||||
"""
|
||||
if _HAS_RUST_LEXER:
|
||||
return PyRsLexer
|
||||
return PyLexer
|
||||
|
||||
@@ -3,13 +3,15 @@
|
||||
This class is a construct to keep track of positions within a file.
|
||||
"""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from dataclasses import dataclass
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from sqlfluff.core.helpers.slice import zero_slice
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from sqlfluff.core.templaters import TemplatedFile # pragma: no cover
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from sqlfluff.core.templaters import TemplatedFile
|
||||
from sqlfluffrs import RsPositionMarker
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
@@ -124,7 +126,7 @@ class PositionMarker:
|
||||
|
||||
@classmethod
|
||||
def from_child_markers(
|
||||
cls, *markers: Optional["PositionMarker"]
|
||||
cls, markers: Sequence[Optional["PositionMarker"]]
|
||||
) -> "PositionMarker":
|
||||
"""Create a parent marker from it's children."""
|
||||
source_slice = slice(
|
||||
@@ -249,3 +251,16 @@ class PositionMarker:
|
||||
def to_source_dict(self) -> dict[str, int]:
|
||||
"""Serialise the source position."""
|
||||
return self.templated_file.source_position_dict_from_slice(self.source_slice)
|
||||
|
||||
@classmethod
|
||||
def from_rs_position_marker(
|
||||
cls,
|
||||
rs_position_marker: "RsPositionMarker",
|
||||
templated_file: "TemplatedFile",
|
||||
) -> "PositionMarker":
|
||||
"""Create a PositionMarker from an RsPositionMarker."""
|
||||
return cls(
|
||||
source_slice=rs_position_marker.source_slice,
|
||||
templated_slice=rs_position_marker.templated_slice,
|
||||
templated_file=templated_file,
|
||||
)
|
||||
|
||||
@@ -195,7 +195,7 @@ class BaseSegment(metaclass=SegmentMetaclass):
|
||||
# If no pos given, work it out from the children.
|
||||
if all(seg.pos_marker for seg in segments):
|
||||
pos_marker = PositionMarker.from_child_markers(
|
||||
*(seg.pos_marker for seg in segments)
|
||||
[seg.pos_marker for seg in segments]
|
||||
)
|
||||
|
||||
assert not hasattr(self, "parse_grammar"), "parse_grammar is deprecated."
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
"""Indent and Dedent classes."""
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Optional
|
||||
from typing import TYPE_CHECKING, Optional
|
||||
from uuid import UUID
|
||||
|
||||
from sqlfluff.core.parser.context import ParseContext
|
||||
@@ -11,6 +11,9 @@ from sqlfluff.core.parser.segments.base import BaseSegment
|
||||
from sqlfluff.core.parser.segments.raw import RawSegment, SourceFix
|
||||
from sqlfluff.core.templaters.base import TemplatedFile
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from sqlfluffrs import RsToken
|
||||
|
||||
|
||||
class MetaSegment(RawSegment):
|
||||
"""A segment which is empty but indicates where something should be."""
|
||||
@@ -80,6 +83,19 @@ class MetaSegment(RawSegment):
|
||||
"""
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def from_rstoken(
|
||||
cls,
|
||||
token: "RsToken",
|
||||
tf: "TemplatedFile",
|
||||
) -> "MetaSegment":
|
||||
"""Create a RawSegment from an RSQL token."""
|
||||
segment = cls(
|
||||
pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
|
||||
block_uuid=token.block_uuid,
|
||||
)
|
||||
return segment
|
||||
|
||||
|
||||
class EndOfFile(MetaSegment):
|
||||
"""A meta segment to indicate the end of the file."""
|
||||
@@ -270,3 +286,14 @@ class TemplateSegment(MetaSegment):
|
||||
source_fixes=sf,
|
||||
block_uuid=self.block_uuid,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_rstoken(cls, token: "RsToken", tf: TemplatedFile) -> "TemplateSegment":
|
||||
"""Create a TemplateSegment from a token."""
|
||||
segment = cls(
|
||||
pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
|
||||
source_str=token.source_str,
|
||||
block_type=token.block_type,
|
||||
block_uuid=token.block_uuid,
|
||||
)
|
||||
return segment
|
||||
|
||||
@@ -4,7 +4,7 @@ This is designed to be the root segment, without
|
||||
any children, and the output of the lexer.
|
||||
"""
|
||||
|
||||
from typing import Any, Callable, Optional, Union, cast
|
||||
from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast
|
||||
from uuid import uuid4
|
||||
|
||||
import regex as re
|
||||
@@ -12,6 +12,10 @@ import regex as re
|
||||
from sqlfluff.core.parser.markers import PositionMarker
|
||||
from sqlfluff.core.parser.segments.base import BaseSegment, SourceFix
|
||||
|
||||
if TYPE_CHECKING: # pragma: no cover
|
||||
from sqlfluff.core.templaters import TemplatedFile
|
||||
from sqlfluffrs import RsToken
|
||||
|
||||
|
||||
class RawSegment(BaseSegment):
|
||||
"""This is a segment without any subsegments."""
|
||||
@@ -299,6 +303,26 @@ class RawSegment(BaseSegment):
|
||||
**new_segment_kwargs,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_rstoken(
|
||||
cls,
|
||||
token: "RsToken",
|
||||
tf: "TemplatedFile",
|
||||
) -> "RawSegment":
|
||||
"""Create a RawSegment from an RSQL token."""
|
||||
segment = cls(
|
||||
raw=token.raw,
|
||||
pos_marker=PositionMarker.from_rs_position_marker(token.pos_marker, tf),
|
||||
instance_types=tuple(token.instance_types),
|
||||
trim_start=token.trim_start,
|
||||
trim_chars=token.trim_chars,
|
||||
source_fixes=token.source_fixes,
|
||||
uuid=token.uuid,
|
||||
quoted_value=token.quoted_value,
|
||||
escape_replacements=token.escape_replacements,
|
||||
)
|
||||
return segment
|
||||
|
||||
|
||||
__all__ = [
|
||||
"PositionMarker",
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user