Skip to content

Commit d8a55e5

Browse files
Make Parser & Tokenizer generic over Dialect
* rustc monomorphizes generics which is more efficient than trait object methods which are dispatched dynamically * the Parser struct no longer has a lifetime, making it overall more convenient (e.g it now is Send, Sync & UnwindSafe) * to keep the tests working a Parse trait is introduced (however only within the tests, it's not part of the public API)
1 parent c08817d commit d8a55e5

25 files changed

+218
-276
lines changed

examples/cli.rs

+16-10
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,24 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
3636
"#,
3737
);
3838

39-
let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
40-
"--ansi" => Box::new(AnsiDialect {}),
41-
"--postgres" => Box::new(PostgreSqlDialect {}),
42-
"--ms" => Box::new(MsSqlDialect {}),
43-
"--mysql" => Box::new(MySqlDialect {}),
44-
"--snowflake" => Box::new(SnowflakeDialect {}),
45-
"--hive" => Box::new(HiveDialect {}),
46-
"--generic" | "" => Box::new(GenericDialect {}),
39+
match std::env::args().nth(2).unwrap_or_default().as_ref() {
40+
"--ansi" => parse::<AnsiDialect>(filename),
41+
"--postgres" => parse::<PostgreSqlDialect>(filename),
42+
"--ms" => parse::<MsSqlDialect>(filename),
43+
"--mysql" => parse::<MySqlDialect>(filename),
44+
"--snowflake" => parse::<SnowflakeDialect>(filename),
45+
"--hive" => parse::<HiveDialect>(filename),
46+
"--generic" | "" => parse::<GenericDialect>(filename),
4747
s => panic!("Unexpected parameter: {}", s),
4848
};
49+
}
4950

50-
println!("Parsing from file '{}' using {:?}", &filename, dialect);
51+
fn parse<D: Dialect>(filename: String) {
52+
println!(
53+
"Parsing from file '{}' using {:?}",
54+
&filename,
55+
std::any::type_name::<D>()
56+
);
5157
let contents = fs::read_to_string(&filename)
5258
.unwrap_or_else(|_| panic!("Unable to read the file {}", &filename));
5359
let without_bom = if contents.chars().next().unwrap() as u64 != 0xfeff {
@@ -57,7 +63,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
5763
chars.next();
5864
chars.as_str()
5965
};
60-
let parse_result = Parser::parse_sql(&*dialect, without_bom);
66+
let parse_result = Parser::<D>::parse_sql(without_bom);
6167
match parse_result {
6268
Ok(statements) => {
6369
println!(

examples/parse_select.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ fn main() {
2121
WHERE a > b AND b < 100 \
2222
ORDER BY a DESC, b";
2323

24-
let dialect = GenericDialect {};
25-
26-
let ast = Parser::parse_sql(&dialect, sql).unwrap();
24+
let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();
2725

2826
println!("AST: {:?}", ast);
2927
}

src/dialect/ansi.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ use crate::dialect::Dialect;
1616
pub struct AnsiDialect {}
1717

1818
impl Dialect for AnsiDialect {
19-
fn is_identifier_start(&self, ch: char) -> bool {
19+
fn is_identifier_start(ch: char) -> bool {
2020
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch)
2121
}
2222

23-
fn is_identifier_part(&self, ch: char) -> bool {
23+
fn is_identifier_part(ch: char) -> bool {
2424
('a'..='z').contains(&ch)
2525
|| ('A'..='Z').contains(&ch)
2626
|| ('0'..='9').contains(&ch)

src/dialect/clickhouse.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ use crate::dialect::Dialect;
1616
pub struct ClickHouseDialect {}
1717

1818
impl Dialect for ClickHouseDialect {
19-
fn is_identifier_start(&self, ch: char) -> bool {
19+
fn is_identifier_start(ch: char) -> bool {
2020
// See https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-identifiers
2121
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
2222
}
2323

24-
fn is_identifier_part(&self, ch: char) -> bool {
25-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
24+
fn is_identifier_part(ch: char) -> bool {
25+
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
2626
}
2727
}

src/dialect/generic.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,15 @@ use crate::dialect::Dialect;
1616
pub struct GenericDialect;
1717

1818
impl Dialect for GenericDialect {
19-
fn is_identifier_start(&self, ch: char) -> bool {
19+
fn is_identifier_start(ch: char) -> bool {
2020
('a'..='z').contains(&ch)
2121
|| ('A'..='Z').contains(&ch)
2222
|| ch == '_'
2323
|| ch == '#'
2424
|| ch == '@'
2525
}
2626

27-
fn is_identifier_part(&self, ch: char) -> bool {
27+
fn is_identifier_part(ch: char) -> bool {
2828
('a'..='z').contains(&ch)
2929
|| ('A'..='Z').contains(&ch)
3030
|| ('0'..='9').contains(&ch)

src/dialect/hive.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,18 @@ use crate::dialect::Dialect;
1616
pub struct HiveDialect {}
1717

1818
impl Dialect for HiveDialect {
19-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
19+
fn is_delimited_identifier_start(ch: char) -> bool {
2020
(ch == '"') || (ch == '`')
2121
}
2222

23-
fn is_identifier_start(&self, ch: char) -> bool {
23+
fn is_identifier_start(ch: char) -> bool {
2424
('a'..='z').contains(&ch)
2525
|| ('A'..='Z').contains(&ch)
2626
|| ('0'..='9').contains(&ch)
2727
|| ch == '$'
2828
}
2929

30-
fn is_identifier_part(&self, ch: char) -> bool {
30+
fn is_identifier_part(ch: char) -> bool {
3131
('a'..='z').contains(&ch)
3232
|| ('A'..='Z').contains(&ch)
3333
|| ('0'..='9').contains(&ch)

src/dialect/mod.rs

+5-47
Original file line numberDiff line numberDiff line change
@@ -34,63 +34,21 @@ pub use self::snowflake::SnowflakeDialect;
3434
pub use self::sqlite::SQLiteDialect;
3535
pub use crate::keywords;
3636

37-
/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates
38-
/// to `true` if `parser.dialect` is one of the `Dialect`s specified.
39-
macro_rules! dialect_of {
40-
( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => {
41-
($($parsed_dialect.dialect.is::<$dialect_type>())||+)
42-
};
43-
}
44-
4537
pub trait Dialect: Debug + Any {
4638
/// Determine if a character starts a quoted identifier. The default
4739
/// implementation, accepting "double quoted" ids is both ANSI-compliant
4840
/// and appropriate for most dialects (with the notable exception of
4941
/// MySQL, MS SQL, and sqlite). You can accept one of characters listed
5042
/// in `Word::matching_end_quote` here
51-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
43+
fn is_delimited_identifier_start(ch: char) -> bool {
5244
ch == '"'
5345
}
5446
/// Determine if a character is a valid start character for an unquoted identifier
55-
fn is_identifier_start(&self, ch: char) -> bool;
47+
fn is_identifier_start(ch: char) -> bool;
5648
/// Determine if a character is a valid unquoted identifier character
57-
fn is_identifier_part(&self, ch: char) -> bool;
58-
}
59-
60-
impl dyn Dialect {
61-
#[inline]
62-
pub fn is<T: Dialect>(&self) -> bool {
63-
// borrowed from `Any` implementation
64-
TypeId::of::<T>() == self.type_id()
65-
}
66-
}
67-
68-
#[cfg(test)]
69-
mod tests {
70-
use super::ansi::AnsiDialect;
71-
use super::generic::GenericDialect;
72-
use super::*;
73-
74-
struct DialectHolder<'a> {
75-
dialect: &'a dyn Dialect,
76-
}
77-
78-
#[test]
79-
fn test_is_dialect() {
80-
let generic_dialect: &dyn Dialect = &GenericDialect {};
81-
let ansi_dialect: &dyn Dialect = &AnsiDialect {};
82-
83-
let generic_holder = DialectHolder {
84-
dialect: generic_dialect,
85-
};
86-
let ansi_holder = DialectHolder {
87-
dialect: ansi_dialect,
88-
};
49+
fn is_identifier_part(ch: char) -> bool;
8950

90-
assert!(dialect_of!(generic_holder is GenericDialect | AnsiDialect),);
91-
assert!(!dialect_of!(generic_holder is AnsiDialect));
92-
assert!(dialect_of!(ansi_holder is AnsiDialect));
93-
assert!(dialect_of!(ansi_holder is GenericDialect | AnsiDialect));
94-
assert!(!dialect_of!(ansi_holder is GenericDialect | MsSqlDialect));
51+
fn is<T: Dialect>() -> bool {
52+
TypeId::of::<Self>() == TypeId::of::<T>()
9553
}
9654
}

src/dialect/mssql.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ use crate::dialect::Dialect;
1616
pub struct MsSqlDialect {}
1717

1818
impl Dialect for MsSqlDialect {
19-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
19+
fn is_delimited_identifier_start(ch: char) -> bool {
2020
ch == '"' || ch == '['
2121
}
2222

23-
fn is_identifier_start(&self, ch: char) -> bool {
23+
fn is_identifier_start(ch: char) -> bool {
2424
// See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers
2525
// We don't support non-latin "letters" currently.
2626
('a'..='z').contains(&ch)
@@ -30,7 +30,7 @@ impl Dialect for MsSqlDialect {
3030
|| ch == '@'
3131
}
3232

33-
fn is_identifier_part(&self, ch: char) -> bool {
33+
fn is_identifier_part(ch: char) -> bool {
3434
('a'..='z').contains(&ch)
3535
|| ('A'..='Z').contains(&ch)
3636
|| ('0'..='9').contains(&ch)

src/dialect/mysql.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use crate::dialect::Dialect;
1616
pub struct MySqlDialect {}
1717

1818
impl Dialect for MySqlDialect {
19-
fn is_identifier_start(&self, ch: char) -> bool {
19+
fn is_identifier_start(ch: char) -> bool {
2020
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
2121
// We don't yet support identifiers beginning with numbers, as that
2222
// makes it hard to distinguish numeric literals.
@@ -27,11 +27,11 @@ impl Dialect for MySqlDialect {
2727
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
2828
}
2929

30-
fn is_identifier_part(&self, ch: char) -> bool {
31-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
30+
fn is_identifier_part(ch: char) -> bool {
31+
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
3232
}
3333

34-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
34+
fn is_delimited_identifier_start(ch: char) -> bool {
3535
ch == '`'
3636
}
3737
}

src/dialect/postgresql.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@
1313
use crate::dialect::Dialect;
1414

1515
#[derive(Debug)]
16-
pub struct PostgreSqlDialect {}
16+
pub struct PostgreSqlDialect;
1717

1818
impl Dialect for PostgreSqlDialect {
19-
fn is_identifier_start(&self, ch: char) -> bool {
19+
fn is_identifier_start(ch: char) -> bool {
2020
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
2121
// We don't yet support identifiers beginning with "letters with
2222
// diacritical marks and non-Latin letters"
2323
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
2424
}
2525

26-
fn is_identifier_part(&self, ch: char) -> bool {
26+
fn is_identifier_part(ch: char) -> bool {
2727
('a'..='z').contains(&ch)
2828
|| ('A'..='Z').contains(&ch)
2929
|| ('0'..='9').contains(&ch)

src/dialect/snowflake.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ pub struct SnowflakeDialect;
1717

1818
impl Dialect for SnowflakeDialect {
1919
// see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
20-
fn is_identifier_start(&self, ch: char) -> bool {
20+
fn is_identifier_start(ch: char) -> bool {
2121
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
2222
}
2323

24-
fn is_identifier_part(&self, ch: char) -> bool {
24+
fn is_identifier_part(ch: char) -> bool {
2525
('a'..='z').contains(&ch)
2626
|| ('A'..='Z').contains(&ch)
2727
|| ('0'..='9').contains(&ch)

src/dialect/sqlite.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@ impl Dialect for SQLiteDialect {
1919
// see https://www.sqlite.org/lang_keywords.html
2020
// parse `...`, [...] and "..." as identifier
2121
// TODO: support depending on the context tread '...' as identifier too.
22-
fn is_delimited_identifier_start(&self, ch: char) -> bool {
22+
fn is_delimited_identifier_start(ch: char) -> bool {
2323
ch == '`' || ch == '"' || ch == '['
2424
}
2525

26-
fn is_identifier_start(&self, ch: char) -> bool {
26+
fn is_identifier_start(ch: char) -> bool {
2727
// See https://www.sqlite.org/draft/tokenreq.html
2828
('a'..='z').contains(&ch)
2929
|| ('A'..='Z').contains(&ch)
@@ -32,7 +32,7 @@ impl Dialect for SQLiteDialect {
3232
|| ('\u{007f}'..='\u{ffff}').contains(&ch)
3333
}
3434

35-
fn is_identifier_part(&self, ch: char) -> bool {
36-
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
35+
fn is_identifier_part(ch: char) -> bool {
36+
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
3737
}
3838
}

src/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,12 @@
2121
//! use sqlparser::dialect::GenericDialect;
2222
//! use sqlparser::parser::Parser;
2323
//!
24-
//! let dialect = GenericDialect {}; // or AnsiDialect
25-
//!
2624
//! let sql = "SELECT a, b, 123, myfunc(b) \
2725
//! FROM table_1 \
2826
//! WHERE a > b AND b < 100 \
2927
//! ORDER BY a DESC, b";
3028
//!
31-
//! let ast = Parser::parse_sql(&dialect, sql).unwrap();
29+
//! let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();
3230
//!
3331
//! println!("AST: {:?}", ast);
3432
//! ```

0 commit comments

Comments
 (0)