Skip to content

Make Parser & Tokenizer generic over Dialect, remove test_utils module #407

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 16 additions & 10 deletions examples/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,18 +36,24 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
"#,
);

let dialect: Box<dyn Dialect> = match std::env::args().nth(2).unwrap_or_default().as_ref() {
"--ansi" => Box::new(AnsiDialect {}),
"--postgres" => Box::new(PostgreSqlDialect {}),
"--ms" => Box::new(MsSqlDialect {}),
"--mysql" => Box::new(MySqlDialect {}),
"--snowflake" => Box::new(SnowflakeDialect {}),
"--hive" => Box::new(HiveDialect {}),
"--generic" | "" => Box::new(GenericDialect {}),
match std::env::args().nth(2).unwrap_or_default().as_ref() {
"--ansi" => parse::<AnsiDialect>(filename),
"--postgres" => parse::<PostgreSqlDialect>(filename),
"--ms" => parse::<MsSqlDialect>(filename),
"--mysql" => parse::<MySqlDialect>(filename),
"--snowflake" => parse::<SnowflakeDialect>(filename),
"--hive" => parse::<HiveDialect>(filename),
"--generic" | "" => parse::<GenericDialect>(filename),
s => panic!("Unexpected parameter: {}", s),
};
}

println!("Parsing from file '{}' using {:?}", &filename, dialect);
fn parse<D: Dialect>(filename: String) {
println!(
"Parsing from file '{}' using {:?}",
&filename,
std::any::type_name::<D>()
);
let contents = fs::read_to_string(&filename)
.unwrap_or_else(|_| panic!("Unable to read the file {}", &filename));
let without_bom = if contents.chars().next().unwrap() as u64 != 0xfeff {
Expand All @@ -57,7 +63,7 @@ $ cargo run --feature json_example --example cli FILENAME.sql [--dialectname]
chars.next();
chars.as_str()
};
let parse_result = Parser::parse_sql(&*dialect, without_bom);
let parse_result = Parser::<D>::parse_sql(without_bom);
match parse_result {
Ok(statements) => {
println!(
Expand Down
4 changes: 1 addition & 3 deletions examples/parse_select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ fn main() {
WHERE a > b AND b < 100 \
ORDER BY a DESC, b";

let dialect = GenericDialect {};

let ast = Parser::parse_sql(&dialect, sql).unwrap();
let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();

println!("AST: {:?}", ast);
}
4 changes: 2 additions & 2 deletions src/dialect/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ use crate::dialect::Dialect;
pub struct AnsiDialect {}

impl Dialect for AnsiDialect {
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch)
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
6 changes: 3 additions & 3 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ use crate::dialect::Dialect;
pub struct ClickHouseDialect {}

impl Dialect for ClickHouseDialect {
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
// See https://clickhouse.com/docs/en/sql-reference/syntax/#syntax-identifiers
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
fn is_identifier_part(ch: char) -> bool {
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
}
}
4 changes: 2 additions & 2 deletions src/dialect/generic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ use crate::dialect::Dialect;
pub struct GenericDialect;

impl Dialect for GenericDialect {
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ch == '_'
|| ch == '#'
|| ch == '@'
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
6 changes: 3 additions & 3 deletions src/dialect/hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ use crate::dialect::Dialect;
pub struct HiveDialect {}

impl Dialect for HiveDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
fn is_delimited_identifier_start(ch: char) -> bool {
(ch == '"') || (ch == '`')
}

fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
|| ch == '$'
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
52 changes: 5 additions & 47 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,63 +34,21 @@ pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
pub use crate::keywords;

/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates
/// to `true` if `parser.dialect` is one of the `Dialect`s specified.
macro_rules! dialect_of {
( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => {
($($parsed_dialect.dialect.is::<$dialect_type>())||+)
};
}

pub trait Dialect: Debug + Any {
/// Determine if a character starts a quoted identifier. The default
/// implementation, accepting "double quoted" ids is both ANSI-compliant
/// and appropriate for most dialects (with the notable exception of
/// MySQL, MS SQL, and sqlite). You can accept one of characters listed
/// in `Word::matching_end_quote` here
fn is_delimited_identifier_start(&self, ch: char) -> bool {
fn is_delimited_identifier_start(ch: char) -> bool {
ch == '"'
}
/// Determine if a character is a valid start character for an unquoted identifier
fn is_identifier_start(&self, ch: char) -> bool;
fn is_identifier_start(ch: char) -> bool;
/// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool;
}

impl dyn Dialect {
#[inline]
pub fn is<T: Dialect>(&self) -> bool {
// borrowed from `Any` implementation
TypeId::of::<T>() == self.type_id()
}
}

#[cfg(test)]
mod tests {
use super::ansi::AnsiDialect;
use super::generic::GenericDialect;
use super::*;

struct DialectHolder<'a> {
dialect: &'a dyn Dialect,
}

#[test]
fn test_is_dialect() {
let generic_dialect: &dyn Dialect = &GenericDialect {};
let ansi_dialect: &dyn Dialect = &AnsiDialect {};

let generic_holder = DialectHolder {
dialect: generic_dialect,
};
let ansi_holder = DialectHolder {
dialect: ansi_dialect,
};
fn is_identifier_part(ch: char) -> bool;

assert!(dialect_of!(generic_holder is GenericDialect | AnsiDialect),);
assert!(!dialect_of!(generic_holder is AnsiDialect));
assert!(dialect_of!(ansi_holder is AnsiDialect));
assert!(dialect_of!(ansi_holder is GenericDialect | AnsiDialect));
assert!(!dialect_of!(ansi_holder is GenericDialect | MsSqlDialect));
fn is<T: Dialect>() -> bool {
TypeId::of::<Self>() == TypeId::of::<T>()
}
}
6 changes: 3 additions & 3 deletions src/dialect/mssql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ use crate::dialect::Dialect;
pub struct MsSqlDialect {}

impl Dialect for MsSqlDialect {
fn is_delimited_identifier_start(&self, ch: char) -> bool {
fn is_delimited_identifier_start(ch: char) -> bool {
ch == '"' || ch == '['
}

fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
// See https://docs.microsoft.com/en-us/sql/relational-databases/databases/database-identifiers?view=sql-server-2017#rules-for-regular-identifiers
// We don't support non-latin "letters" currently.
('a'..='z').contains(&ch)
Expand All @@ -30,7 +30,7 @@ impl Dialect for MsSqlDialect {
|| ch == '@'
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
8 changes: 4 additions & 4 deletions src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::dialect::Dialect;
pub struct MySqlDialect {}

impl Dialect for MySqlDialect {
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
// See https://dev.mysql.com/doc/refman/8.0/en/identifiers.html.
// We don't yet support identifiers beginning with numbers, as that
// makes it hard to distinguish numeric literals.
Expand All @@ -27,11 +27,11 @@ impl Dialect for MySqlDialect {
|| ('\u{0080}'..='\u{ffff}').contains(&ch)
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
fn is_identifier_part(ch: char) -> bool {
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
}

fn is_delimited_identifier_start(&self, ch: char) -> bool {
fn is_delimited_identifier_start(ch: char) -> bool {
ch == '`'
}
}
6 changes: 3 additions & 3 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
use crate::dialect::Dialect;

#[derive(Debug)]
pub struct PostgreSqlDialect {}
pub struct PostgreSqlDialect;

impl Dialect for PostgreSqlDialect {
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
// See https://www.postgresql.org/docs/11/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
// We don't yet support identifiers beginning with "letters with
// diacritical marks and non-Latin letters"
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
4 changes: 2 additions & 2 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ pub struct SnowflakeDialect;

impl Dialect for SnowflakeDialect {
// see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html
fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
('a'..='z').contains(&ch) || ('A'..='Z').contains(&ch) || ch == '_'
}

fn is_identifier_part(&self, ch: char) -> bool {
fn is_identifier_part(ch: char) -> bool {
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
|| ('0'..='9').contains(&ch)
Expand Down
8 changes: 4 additions & 4 deletions src/dialect/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ impl Dialect for SQLiteDialect {
// see https://www.sqlite.org/lang_keywords.html
// parse `...`, [...] and "..." as identifier
// TODO: support depending on the context tread '...' as identifier too.
fn is_delimited_identifier_start(&self, ch: char) -> bool {
fn is_delimited_identifier_start(ch: char) -> bool {
ch == '`' || ch == '"' || ch == '['
}

fn is_identifier_start(&self, ch: char) -> bool {
fn is_identifier_start(ch: char) -> bool {
// See https://www.sqlite.org/draft/tokenreq.html
('a'..='z').contains(&ch)
|| ('A'..='Z').contains(&ch)
Expand All @@ -32,7 +32,7 @@ impl Dialect for SQLiteDialect {
|| ('\u{007f}'..='\u{ffff}').contains(&ch)
}

fn is_identifier_part(&self, ch: char) -> bool {
self.is_identifier_start(ch) || ('0'..='9').contains(&ch)
fn is_identifier_part(ch: char) -> bool {
Self::is_identifier_start(ch) || ('0'..='9').contains(&ch)
}
}
10 changes: 1 addition & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,12 @@
//! use sqlparser::dialect::GenericDialect;
//! use sqlparser::parser::Parser;
//!
//! let dialect = GenericDialect {}; // or AnsiDialect
//!
//! let sql = "SELECT a, b, 123, myfunc(b) \
//! FROM table_1 \
//! WHERE a > b AND b < 100 \
//! ORDER BY a DESC, b";
//!
//! let ast = Parser::parse_sql(&dialect, sql).unwrap();
//! let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
//! let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();
//! // Parse using a generic dialect of SQL.
//! // Other dialects such as `AnsiDialect` are available
//! let ast = Parser::<GenericDialect>::parse_sql(sql).unwrap();

Copy link
Author

@not-my-profile not-my-profile Feb 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to improve the documentation in a followup PR. This PR is already big enough IMO.

//!
//! println!("AST: {:?}", ast);
//! ```
Expand All @@ -45,9 +43,3 @@ pub mod dialect;
pub mod keywords;
pub mod parser;
pub mod tokenizer;

#[doc(hidden)]
// This is required to make utilities accessible by both the crate-internal
// unit-tests and by the integration tests <https://stackoverflow.com/a/44541071/1026>
// External users are not supposed to rely on this module.
pub mod test_utils;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a reasonable change, though it may cause other users of this crate upgrade pain 🤔

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was #[doc(hidden)] and documented that "External users are not supposed to rely on this module." So I really don't see the problem.

Loading