Skip to content

Commit 34bb424

Browse files
groobymingalamb
authored andcommitted
Extended dialect triat to support numeric prefixed identifiers (apache#1188)
Co-authored-by: Andrew Lamb <[email protected]>
1 parent 2bf39af commit 34bb424

File tree

4 files changed

+68
-5
lines changed

4 files changed

+68
-5
lines changed

src/dialect/hive.rs

+4
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,8 @@ impl Dialect for HiveDialect {
3838
fn supports_filter_during_aggregation(&self) -> bool {
3939
true
4040
}
41+
42+
fn supports_numeric_prefix(&self) -> bool {
43+
true
44+
}
4145
}

src/dialect/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,11 @@ pub trait Dialect: Debug + Any {
185185
fn supports_named_fn_args_with_eq_operator(&self) -> bool {
186186
false
187187
}
188+
/// Returns true if the dialect supports identifiers starting with a numeric
189+
/// prefix such as tables named: `59901_user_login`
190+
fn supports_numeric_prefix(&self) -> bool {
191+
false
192+
}
188193
/// Returns true if the dialects supports specifying null treatment
189194
/// as part of a window function's parameter list. As opposed
190195
/// to after the parameter list.

src/dialect/mysql.rs

+4
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ impl Dialect for MySqlDialect {
5353
true
5454
}
5555

56+
fn supports_numeric_prefix(&self) -> bool {
57+
true
58+
}
59+
5660
fn parse_infix(
5761
&self,
5862
parser: &mut crate::parser::Parser,

src/tokenizer.rs

+55-5
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,10 @@ use serde::{Deserialize, Serialize};
3535
use sqlparser_derive::{Visit, VisitMut};
3636

3737
use crate::ast::DollarQuotedString;
38+
use crate::dialect::Dialect;
3839
use crate::dialect::{
39-
BigQueryDialect, DuckDbDialect, GenericDialect, HiveDialect, PostgreSqlDialect,
40-
SnowflakeDialect,
40+
BigQueryDialect, DuckDbDialect, GenericDialect, PostgreSqlDialect, SnowflakeDialect,
4141
};
42-
use crate::dialect::{Dialect, MySqlDialect};
4342
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
4443

4544
/// SQL Token enumeration
@@ -821,7 +820,7 @@ impl<'a> Tokenizer<'a> {
821820

822821
// mysql dialect supports identifiers that start with a numeric prefix,
823822
// as long as they aren't an exponent number.
824-
if dialect_of!(self is MySqlDialect | HiveDialect) && exponent_part.is_empty() {
823+
if self.dialect.supports_numeric_prefix() && exponent_part.is_empty() {
825824
let word =
826825
peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch));
827826

@@ -1544,7 +1543,10 @@ impl<'a: 'b, 'b> Unescape<'a, 'b> {
15441543
#[cfg(test)]
15451544
mod tests {
15461545
use super::*;
1547-
use crate::dialect::{BigQueryDialect, ClickHouseDialect, MsSqlDialect};
1546+
use crate::dialect::{
1547+
BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect,
1548+
};
1549+
use core::fmt::Debug;
15481550

15491551
#[test]
15501552
fn tokenizer_error_impl() {
@@ -2414,6 +2416,54 @@ mod tests {
24142416
check_unescape(r"Hello\xCADRust", None);
24152417
}
24162418

2419+
#[test]
2420+
fn tokenize_numeric_prefix_trait() {
2421+
#[derive(Debug)]
2422+
struct NumericPrefixDialect;
2423+
2424+
impl Dialect for NumericPrefixDialect {
2425+
fn is_identifier_start(&self, ch: char) -> bool {
2426+
ch.is_ascii_lowercase()
2427+
|| ch.is_ascii_uppercase()
2428+
|| ch.is_ascii_digit()
2429+
|| ch == '$'
2430+
}
2431+
2432+
fn is_identifier_part(&self, ch: char) -> bool {
2433+
ch.is_ascii_lowercase()
2434+
|| ch.is_ascii_uppercase()
2435+
|| ch.is_ascii_digit()
2436+
|| ch == '_'
2437+
|| ch == '$'
2438+
|| ch == '{'
2439+
|| ch == '}'
2440+
}
2441+
2442+
fn supports_numeric_prefix(&self) -> bool {
2443+
true
2444+
}
2445+
}
2446+
2447+
tokenize_numeric_prefix_inner(&NumericPrefixDialect {});
2448+
tokenize_numeric_prefix_inner(&HiveDialect {});
2449+
tokenize_numeric_prefix_inner(&MySqlDialect {});
2450+
}
2451+
2452+
fn tokenize_numeric_prefix_inner(dialect: &dyn Dialect) {
2453+
let sql = r#"SELECT * FROM 1"#;
2454+
let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap();
2455+
let expected = vec![
2456+
Token::make_keyword("SELECT"),
2457+
Token::Whitespace(Whitespace::Space),
2458+
Token::Mul,
2459+
Token::Whitespace(Whitespace::Space),
2460+
Token::make_keyword("FROM"),
2461+
Token::Whitespace(Whitespace::Space),
2462+
Token::Number(String::from("1"), false),
2463+
];
2464+
compare(expected, tokens);
2465+
}
2466+
24172467
#[test]
24182468
fn tokenize_quoted_string_escape() {
24192469
for (sql, expected, expected_unescaped) in [

0 commit comments

Comments
 (0)