diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 20800c1d3..2340df611 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -38,4 +38,8 @@ impl Dialect for HiveDialect { fn supports_filter_during_aggregation(&self) -> bool { true } + + fn supports_numeric_prefix(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index d3257aba3..a04390570 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -185,6 +185,11 @@ pub trait Dialect: Debug + Any { fn supports_named_fn_args_with_eq_operator(&self) -> bool { false } + /// Returns true if the dialect supports identifiers starting with a numeric + /// prefix such as tables named: `59901_user_login` + fn supports_numeric_prefix(&self) -> bool { + false + } /// Returns true if the dialects supports specifying null treatment /// as part of a window function's parameter list. As opposed /// to after the parameter list. diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index f7711b2b0..32525658b 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -53,6 +53,10 @@ impl Dialect for MySqlDialect { true } + fn supports_numeric_prefix(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8ebc2dd0c..8e3177e9f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -35,11 +35,10 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::DollarQuotedString; +use crate::dialect::Dialect; use crate::dialect::{ - BigQueryDialect, DuckDbDialect, GenericDialect, HiveDialect, PostgreSqlDialect, - SnowflakeDialect, + BigQueryDialect, DuckDbDialect, GenericDialect, PostgreSqlDialect, SnowflakeDialect, }; -use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; /// SQL Token enumeration @@ -821,7 +820,7 @@ impl<'a> Tokenizer<'a> { // mysql dialect supports identifiers that start with a numeric prefix, // as long as they aren't an exponent number. - if dialect_of!(self is MySqlDialect | HiveDialect) && exponent_part.is_empty() { + if self.dialect.supports_numeric_prefix() && exponent_part.is_empty() { let word = peeking_take_while(chars, |ch| self.dialect.is_identifier_part(ch)); @@ -1544,7 +1543,10 @@ impl<'a: 'b, 'b> Unescape<'a, 'b> { #[cfg(test)] mod tests { use super::*; - use crate::dialect::{BigQueryDialect, ClickHouseDialect, MsSqlDialect}; + use crate::dialect::{ + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + }; + use core::fmt::Debug; #[test] fn tokenizer_error_impl() { @@ -2414,6 +2416,54 @@ mod tests { check_unescape(r"Hello\xCADRust", None); } + #[test] + fn tokenize_numeric_prefix_trait() { + #[derive(Debug)] + struct NumericPrefixDialect; + + impl Dialect for NumericPrefixDialect { + fn is_identifier_start(&self, ch: char) -> bool { + ch.is_ascii_lowercase() + || ch.is_ascii_uppercase() + || ch.is_ascii_digit() + || ch == '$' + } + + fn is_identifier_part(&self, ch: char) -> bool { + ch.is_ascii_lowercase() + || ch.is_ascii_uppercase() + || ch.is_ascii_digit() + || ch == '_' + || ch == '$' + || ch == '{' + || ch == '}' + } + + fn supports_numeric_prefix(&self) -> bool { + true + } + } + + tokenize_numeric_prefix_inner(&NumericPrefixDialect {}); + tokenize_numeric_prefix_inner(&HiveDialect {}); + tokenize_numeric_prefix_inner(&MySqlDialect {}); + } + + fn tokenize_numeric_prefix_inner(dialect: &dyn Dialect) { + let sql = r#"SELECT * FROM 1"#; + let tokens = Tokenizer::new(dialect, sql).tokenize().unwrap(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Mul, + Token::Whitespace(Whitespace::Space), + Token::make_keyword("FROM"), + Token::Whitespace(Whitespace::Space), + Token::Number(String::from("1"), false), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_quoted_string_escape() { for (sql, expected, expected_unescaped) in [