TSQL: allow special characters in variable and table names (#7262)

This commit is contained in:
Peter Budai
2025-11-27 22:24:53 +01:00
committed by GitHub
parent fe851261dc
commit 4ae92ea334
4 changed files with 278 additions and 53 deletions

View File

@@ -362,29 +362,7 @@ pub static TSQL_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
LexMatcher::regex_lexer(
"atsign",
r#"[@][a-zA-Z0-9_]+"#,
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
quoted_value, escape_replacement, casefold| {
Token::code_token(raw, pos_marker, TokenConfig {
class_types, instance_types, trim_start, trim_chars,
quoted_value, escape_replacement, casefold,
})
},
None,
None,
None,
None,
None,
None,
None,
None,
|_| true,
None,
),
LexMatcher::regex_lexer(
"var_prefix",
r#"[$][a-zA-Z0-9_]+"#,
r#"[@][a-zA-Z0-9_@$#]+"#,
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
quoted_value, escape_replacement, casefold| {
Token::code_token(raw, pos_marker, TokenConfig {
@@ -450,7 +428,7 @@ pub static TSQL_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
LexMatcher::regex_lexer(
"hash_prefix",
r#"[#][#]?[a-zA-Z0-9_]+"#,
r#"[#][#]?[a-zA-Z0-9_@$#]+"#,
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
quoted_value, escape_replacement, casefold| {
Token::code_token(raw, pos_marker, TokenConfig {
@@ -1148,7 +1126,7 @@ pub static TSQL_LEXERS: Lazy<Vec<LexMatcher>> = Lazy::new(|| { vec![
LexMatcher::regex_lexer(
"word",
r#"[0-9a-zA-Z_#@\p{L}]+"#,
r#"[0-9a-zA-Z_#@$\p{L}]+"#,
|raw, pos_marker, class_types, instance_types, trim_start, trim_chars,
quoted_value, escape_replacement, casefold| {
Token::word_token(raw, pos_marker, TokenConfig {

View File

@@ -164,16 +164,16 @@ tsql_dialect.sets("serde_method").update(
tsql_dialect.insert_lexer_matchers(
[
# According to Microsoft spec, subsequent characters in identifiers can include
# @, $, #, _ in addition to letters and numbers
# https://learn.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers
RegexLexer(
"atsign",
r"[@][a-zA-Z0-9_]+",
CodeSegment,
),
RegexLexer(
"var_prefix",
r"[$][a-zA-Z0-9_]+",
r"[@][a-zA-Z0-9_@$#]+",
CodeSegment,
),
# Note: $ can only appear in subsequent positions of identifiers, not as prefix
# $ACTION is handled separately by ActionParameterSegment parser
RegexLexer(
"square_quote",
r"\[([^\[\]]*)*\]",
@@ -193,7 +193,7 @@ tsql_dialect.insert_lexer_matchers(
),
RegexLexer(
"hash_prefix",
r"[#][#]?[a-zA-Z0-9_]+",
r"[#][#]?[a-zA-Z0-9_@$#]+",
CodeSegment,
),
RegexLexer(
@@ -272,9 +272,12 @@ tsql_dialect.patch_lexer_matchers(
WhitespaceSegment,
),
),
RegexLexer(
"word", r"[0-9a-zA-Z_#@\p{L}]+", WordSegment
), # overriding to allow hash mark and at-sign in code
# Patch word lexer to allow @, $, # in identifiers (subsequent positions)
# According to Microsoft spec, these can appear anywhere in identifier
# except $ cannot be first character (first must be letter, _, @, or #)
# The special prefix lexers (atsign, hash_prefix) will match first for
# @, # prefixed identifiers which have semantic meaning (variables, temp tables)
RegexLexer("word", r"[0-9a-zA-Z_#@$\p{L}]+", WordSegment),
]
)
@@ -296,12 +299,6 @@ tsql_dialect.add(
type="hash_identifier",
casefold=str.upper,
),
VariableIdentifierSegment=TypedParser(
"var_prefix",
IdentifierSegment,
type="variable_identifier",
casefold=str.upper,
),
BatchDelimiterGrammar=Ref("GoStatementSegment"),
QuotedLiteralSegmentWithN=TypedParser(
"single_quote_with_n", LiteralSegment, type="quoted_literal"
@@ -490,7 +487,6 @@ tsql_dialect.replace(
Ref("BracketedIdentifierSegment"),
Ref("HashIdentifierSegment"),
Ref("ParameterNameSegment"),
Ref("VariableIdentifierSegment"),
),
NumericLiteralSegment=OneOf(
# Try integer first, then fallback to the original numeric
@@ -516,7 +512,9 @@ tsql_dialect.replace(
Ref("SystemVariableSegment"),
],
),
ParameterNameSegment=RegexParser(r"@[A-Za-z0-9_]+", CodeSegment, type="parameter"),
ParameterNameSegment=RegexParser(
r"@(?!@)[A-Za-z0-9_@$#]+", CodeSegment, type="parameter"
),
FunctionParameterGrammar=Sequence(
Ref("ParameterNameSegment", optional=True),
Sequence("AS", optional=True),
@@ -4901,11 +4899,7 @@ class TransactionStatementSegment(BaseSegment):
Sequence(
OneOf("COMMIT", "ROLLBACK"),
Ref("TransactionGrammar", optional=True),
OneOf(
Ref("SingleIdentifierGrammar"),
Ref("VariableIdentifierSegment"),
optional=True,
),
Ref("SingleIdentifierGrammar", optional=True),
),
Sequence(
OneOf("COMMIT", "ROLLBACK"),
@@ -4914,11 +4908,7 @@ class TransactionStatementSegment(BaseSegment):
Sequence(
"SAVE",
Ref("TransactionGrammar"),
OneOf(
Ref("SingleIdentifierGrammar"),
Ref("VariableIdentifierSegment"),
optional=True,
),
Ref("SingleIdentifierGrammar", optional=True),
),
)

View File

@@ -0,0 +1,30 @@
-- Test special characters in T-SQL identifiers
-- According to Microsoft spec:
-- https://learn.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers
--
-- First character: letter, underscore, @, or #
-- Subsequent characters: letters, numbers, @, $, #, underscore
-- Variables with @ prefix can have $, #, _ in subsequent positions
DECLARE @variable INT = 1;
DECLARE @$variable INT = 2;
DECLARE @#variable INT = 3;
DECLARE @_variable INT = 4;
DECLARE @var$test INT = 5;
DECLARE @var#test INT = 6;
DECLARE @var_test INT = 7;
-- Temp tables with # prefix can have @, $, _ in subsequent positions
CREATE TABLE #temp (id INT);
CREATE TABLE #$temp (id INT);
CREATE TABLE #@temp (id INT);
CREATE TABLE #_temp (id INT);
CREATE TABLE ##global (id INT);
-- Regular identifiers can have @, $, # in subsequent positions
CREATE TABLE Table$name (Column@name INT, Column#test INT, Column_test INT);
-- Using these identifiers in queries
SELECT @variable, @$variable, @#variable;
SELECT * FROM Table$name;
SELECT Column@name, Column#test FROM Table$name;

View File

@@ -0,0 +1,227 @@
# YML test files are auto-generated from SQL files and should not be edited by
# hand. To help enforce this, the "hash" field in the file must match a hash
# computed by SQLFluff when running the tests. Please run
# `python test/generate_parse_fixture_yml.py` to generate them after adding or
# altering SQL files.
_hash: cbb77432a95f03a12ba5dda9a81d5071c31de27432bf9ce05df6e042e73ff2b3
file:
batch:
- statement:
declare_segment:
keyword: DECLARE
parameter: '@variable'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '1'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@$variable'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '2'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@#variable'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '3'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@_variable'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '4'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@var$test'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '5'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@var#test'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '6'
- statement_terminator: ;
- statement:
declare_segment:
keyword: DECLARE
parameter: '@var_test'
data_type:
keyword: INT
comparison_operator:
raw_comparison_operator: '='
expression:
integer_literal: '7'
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
hash_identifier: '#temp'
- bracketed:
start_bracket: (
column_definition:
naked_identifier: id
data_type:
keyword: INT
end_bracket: )
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
hash_identifier: '#$temp'
- bracketed:
start_bracket: (
column_definition:
naked_identifier: id
data_type:
keyword: INT
end_bracket: )
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
hash_identifier: '#@temp'
- bracketed:
start_bracket: (
column_definition:
naked_identifier: id
data_type:
keyword: INT
end_bracket: )
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
hash_identifier: '#_temp'
- bracketed:
start_bracket: (
column_definition:
naked_identifier: id
data_type:
keyword: INT
end_bracket: )
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
hash_identifier: '##global'
- bracketed:
start_bracket: (
column_definition:
naked_identifier: id
data_type:
keyword: INT
end_bracket: )
- statement_terminator: ;
- statement:
create_table_statement:
- keyword: CREATE
- keyword: TABLE
- table_reference:
naked_identifier: Table$name
- bracketed:
- start_bracket: (
- column_definition:
naked_identifier: Column@name
data_type:
keyword: INT
- comma: ','
- column_definition:
naked_identifier: Column#test
data_type:
keyword: INT
- comma: ','
- column_definition:
naked_identifier: Column_test
data_type:
keyword: INT
- end_bracket: )
- statement_terminator: ;
- statement:
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
parameter: '@variable'
- comma: ','
- select_clause_element:
parameter: '@$variable'
- comma: ','
- select_clause_element:
parameter: '@#variable'
- statement_terminator: ;
- statement:
select_statement:
select_clause:
keyword: SELECT
select_clause_element:
wildcard_expression:
wildcard_identifier:
star: '*'
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
naked_identifier: Table$name
- statement_terminator: ;
- statement:
select_statement:
select_clause:
- keyword: SELECT
- select_clause_element:
column_reference:
naked_identifier: Column@name
- comma: ','
- select_clause_element:
column_reference:
naked_identifier: Column#test
from_clause:
keyword: FROM
from_expression:
from_expression_element:
table_expression:
table_reference:
naked_identifier: Table$name
- statement_terminator: ;