Files
sqlfluff/examples/01_basic_api_usage.py
Jay Chung eb6b96ba64 Use ruff rule I replace isort (#5289)
Co-authored-by: Alan Cruickshank <alanmcruickshank@gmail.com>
2023-10-09 15:20:09 +00:00

82 lines
2.8 KiB
Python

"""This is an example of how to use the simple sqlfluff api."""
from typing import Any, Dict, Iterator, List, Union
import sqlfluff
# -------- LINTING ----------
my_bad_query = "SeLEct *, 1, blah as fOO from mySchema.myTable"
# Lint the given string and return an array of violations in JSON representation.
lint_result = sqlfluff.lint(my_bad_query, dialect="bigquery")
# lint_result =
# [
# {
# "code": "CP01",
# "line_no": 1,
# "line_pos": 1,
# "description": "Keywords must be consistently upper case.",
# }
# ...
# ]
# -------- FIXING ----------
# Fix the given string and get a string back which has been fixed.
fix_result_1 = sqlfluff.fix(my_bad_query, dialect="bigquery")
# fix_result_1 = 'SELECT *, 1, blah AS foo FROM myschema.mytable\n'
# We can also fix just specific rules.
fix_result_2 = sqlfluff.fix(my_bad_query, rules=["CP01"])
# fix_result_2 = 'SELECT *, 1, blah AS fOO FROM mySchema.myTable'
# Or a subset of rules...
fix_result_3 = sqlfluff.fix(my_bad_query, rules=["CP01", "CP02"])
# fix_result_3 = 'SELECT *, 1, blah AS fOO FROM myschema.mytable'
# -------- PARSING ----------
# Parse the given string and return a JSON representation of the parsed tree.
parse_result = sqlfluff.parse(my_bad_query)
# parse_result = {'file': {'statement': {...}, 'newline': '\n'}}
# This JSON structure can then be parsed as required.
# An example usage is shown below:
def get_json_segment(
parse_result: Dict[str, Any], segment_type: str
) -> Iterator[Union[str, Dict[str, Any], List[Dict[str, Any]]]]:
"""Recursively search JSON parse result for specified segment type.
Args:
parse_result (Dict[str, Any]): JSON parse result from `sqlfluff.fix`.
segment_type (str): The segment type to search for.
Yields:
Iterator[Union[str, Dict[str, Any], List[Dict[str, Any]]]]:
Retrieves children of specified segment type as either a string for a raw
segment or as JSON or an array of JSON for non-raw segments.
"""
for k, v in parse_result.items():
if k == segment_type:
yield v
elif isinstance(v, dict):
yield from get_json_segment(v, segment_type)
elif isinstance(v, list):
for s in v:
yield from get_json_segment(s, segment_type)
# e.g. Retrieve array of JSON for table references.
table_references = list(get_json_segment(parse_result, "table_reference"))
print(table_references)
# [[{'identifier': 'mySchema'}, {'dot': '.'}, {'identifier': 'myTable'}]]
# Retrieve raw table name from last identifier in the table reference.
for table_reference in table_references:
table_name = list(get_json_segment(parse_result, "naked_identifier"))[-1]
print(f"table_name: {table_name}")
# table_name: myTable