From 43f4c6875fe45aeac2eb89bc8721dcb475bba1bc Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 8 Mar 2019 07:27:27 -0700 Subject: [PATCH] Revert "Rework keyword/identifier parsing" --- src/dialect/ansi_sql.rs | 331 ++++++++++++++++++++++++++++ src/dialect/generic_sql.rs | 14 ++ src/dialect/keywords.rs | 375 +------------------------------- src/dialect/mod.rs | 2 + src/dialect/postgresql.rs | 16 ++ src/sqlast/mod.rs | 41 ++-- src/sqlast/table_key.rs | 8 +- src/sqlparser.rs | 423 ++++++++++++++++++------------------ src/sqltokenizer.rs | 127 ++++------- tests/sqlparser_generic.rs | 115 +++------- tests/sqlparser_postgres.rs | 26 ++- 11 files changed, 691 insertions(+), 787 deletions(-) diff --git a/src/dialect/ansi_sql.rs b/src/dialect/ansi_sql.rs index 4026cf61c..b91fdc6e9 100644 --- a/src/dialect/ansi_sql.rs +++ b/src/dialect/ansi_sql.rs @@ -1,8 +1,339 @@ use dialect::Dialect; +use dialect::keywords::*; + pub struct AnsiSqlDialect {} impl Dialect for AnsiSqlDialect { + fn keywords(&self) -> Vec<&'static str> { + return vec![ + ABS, + ALL, + ALLOCATE, + ALTER, + AND, + ANY, + ARE, + ARRAY, + ARRAY_AGG, + ARRAY_MAX_CARDINALITY, + AS, + ASENSITIVE, + ASYMMETRIC, + AT, + ATOMIC, + AUTHORIZATION, + AVG, + BEGIN, + BEGIN_FRAME, + BEGIN_PARTITION, + BETWEEN, + BIGINT, + BINARY, + BLOB, + BOOLEAN, + BOTH, + BY, + CALL, + CALLED, + CARDINALITY, + CASCADED, + CASE, + CAST, + CEIL, + CEILING, + CHAR, + CHAR_LENGTH, + CHARACTER, + CHARACTER_LENGTH, + CHECK, + CLOB, + CLOSE, + COALESCE, + COLLATE, + COLLECT, + COLUMN, + COMMIT, + CONDITION, + CONNECT, + CONSTRAINT, + CONTAINS, + CONVERT, + CORR, + CORRESPONDING, + COUNT, + COVAR_POP, + COVAR_SAMP, + CREATE, + CROSS, + CUBE, + CUME_DIST, + CURRENT, + CURRENT_CATALOG, + CURRENT_DATE, + CURRENT_DEFAULT_TRANSFORM_GROUP, + CURRENT_PATH, + CURRENT_ROLE, + CURRENT_ROW, + CURRENT_SCHEMA, + CURRENT_TIME, + CURRENT_TIMESTAMP, + CURRENT_TRANSFORM_GROUP_FOR_TYPE, + CURRENT_USER, + CURSOR, + CYCLE, + DATE, + DAY, + DEALLOCATE, + DEC, + DECIMAL, + DECLARE, + DEFAULT, + DELETE, + DENSE_RANK, + DEREF, + DESCRIBE, + DETERMINISTIC, + DISCONNECT, + DISTINCT, + DOUBLE, + DROP, + DYNAMIC, + EACH, + ELEMENT, + ELSE, + END, + END_FRAME, + END_PARTITION, + END_EXEC, + EQUALS, + ESCAPE, + EVERY, + EXCEPT, + EXEC, + EXECUTE, + EXISTS, + EXP, + EXTERNAL, + EXTRACT, + FALSE, + FETCH, + FILTER, + FIRST_VALUE, + FLOAT, + FLOOR, + FOR, + FOREIGN, + FRAME_ROW, + FREE, + FROM, + FULL, + FUNCTION, + FUSION, + GET, + GLOBAL, + GRANT, + GROUP, + GROUPING, + GROUPS, + HAVING, + HOLD, + HOUR, + IDENTITY, + IN, + INDICATOR, + INNER, + INOUT, + INSENSITIVE, + INSERT, + INT, + INTEGER, + INTERSECT, + INTERSECTION, + INTERVAL, + INTO, + IS, + JOIN, + LAG, + LANGUAGE, + LARGE, + LAST_VALUE, + LATERAL, + LEAD, + LEADING, + LEFT, + LIKE, + LIKE_REGEX, + LN, + LOCAL, + LOCALTIME, + LOCALTIMESTAMP, + LOWER, + MATCH, + MAX, + MEMBER, + MERGE, + METHOD, + MIN, + MINUTE, + MOD, + MODIFIES, + MODULE, + MONTH, + MULTISET, + NATIONAL, + NATURAL, + NCHAR, + NCLOB, + NEW, + NO, + NONE, + NORMALIZE, + NOT, + NTH_VALUE, + NTILE, + NULL, + NULLIF, + NUMERIC, + OCTET_LENGTH, + OCCURRENCES_REGEX, + OF, + OFFSET, + OLD, + ON, + ONLY, + OPEN, + OR, + ORDER, + OUT, + OUTER, + OVER, + OVERLAPS, + OVERLAY, + PARAMETER, + PARTITION, + PERCENT, + PERCENT_RANK, + PERCENTILE_CONT, + PERCENTILE_DISC, + PERIOD, + PORTION, + POSITION, + POSITION_REGEX, + POWER, + PRECEDES, + PRECISION, + PREPARE, + PRIMARY, + PROCEDURE, + RANGE, + RANK, + READS, + REAL, + RECURSIVE, + REF, + REFERENCES, + REFERENCING, + REGR_AVGX, + REGR_AVGY, + REGR_COUNT, + REGR_INTERCEPT, + REGR_R2, + REGR_SLOPE, + REGR_SXX, + REGR_SXY, + REGR_SYY, + RELEASE, + RESULT, + RETURN, + RETURNS, + REVOKE, + RIGHT, + ROLLBACK, + ROLLUP, + ROW, + ROW_NUMBER, + ROWS, + SAVEPOINT, + SCOPE, + SCROLL, + SEARCH, + SECOND, + SELECT, + SENSITIVE, + SESSION_USER, + SET, + SIMILAR, + SMALLINT, + SOME, + SPECIFIC, + SPECIFICTYPE, + SQL, + SQLEXCEPTION, + SQLSTATE, + SQLWARNING, + SQRT, + START, + STATIC, + STDDEV_POP, + STDDEV_SAMP, + SUBMULTISET, + SUBSTRING, + SUBSTRING_REGEX, + SUCCEEDS, + SUM, + SYMMETRIC, + SYSTEM, + SYSTEM_TIME, + SYSTEM_USER, + TABLE, + TABLESAMPLE, + THEN, + TIME, + TIMESTAMP, + TIMEZONE_HOUR, + TIMEZONE_MINUTE, + TO, + TRAILING, + TRANSLATE, + TRANSLATE_REGEX, + TRANSLATION, + TREAT, + TRIGGER, + TRUNCATE, + TRIM, + TRIM_ARRAY, + TRUE, + UESCAPE, + UNION, + UNIQUE, + UNKNOWN, + UNNEST, + UPDATE, + UPPER, + USER, + USING, + VALUE, + VALUES, + VALUE_OF, + VAR_POP, + VAR_SAMP, + VARBINARY, + VARCHAR, + VARYING, + VERSIONING, + WHEN, + WHENEVER, + WHERE, + WIDTH_BUCKET, + WINDOW, + WITH, + WITHIN, + WITHOUT, + YEAR, + ]; + } + fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } diff --git a/src/dialect/generic_sql.rs b/src/dialect/generic_sql.rs index 54275d69f..0f18b7234 100644 --- a/src/dialect/generic_sql.rs +++ b/src/dialect/generic_sql.rs @@ -1,7 +1,21 @@ use dialect::Dialect; + +use dialect::keywords::*; pub struct GenericSqlDialect {} impl Dialect for GenericSqlDialect { + fn keywords(&self) -> Vec<&'static str> { + return vec![ + SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE, + DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS, + STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types + CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, + REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, + BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, + CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, + ]; + } + fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 1a39fe448..e46837243 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -1,23 +1,12 @@ -///! This module defines -/// 1) a list of constants for every keyword that -/// can appear in SQLWord::keyword: -/// pub const KEYWORD = "KEYWORD" -/// 2) an `ALL_KEYWORDS` array with every keyword in it -/// This is not a list of *reserved* keywords: some of these can be -/// parsed as identifiers if the parser decides so. This means that -/// new keywords can be added here without affecting the parse result. -/// -/// As a matter of fact, most of these keywords are not used at all -/// and could be removed. -/// 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a -/// "table alias" context. - +/// make a listing of keywords +/// with static str and their stringified value macro_rules! keyword { ($($ident:ident),*) => { - $(pub const $ident: &'static str = stringify!($ident);)* + $(pub static $ident: &'static str = stringify!($ident);)* } } +/// enumerate all the keywords here for all dialects to support in this project keyword!( ABS, ADD, @@ -363,358 +352,4 @@ keyword!( ); /// special case of keyword where the it is an invalid identifier -pub const END_EXEC: &'static str = "END-EXEC"; - -pub const ALL_KEYWORDS: &'static [&'static str] = &[ - ABS, - ADD, - ASC, - ALL, - ALLOCATE, - ALTER, - AND, - ANY, - ARE, - ARRAY, - ARRAY_AGG, - ARRAY_MAX_CARDINALITY, - AS, - ASENSITIVE, - ASYMMETRIC, - AT, - ATOMIC, - AUTHORIZATION, - AVG, - BEGIN, - BEGIN_FRAME, - BEGIN_PARTITION, - BETWEEN, - BIGINT, - BINARY, - BLOB, - BOOLEAN, - BOTH, - BY, - BYTEA, - CALL, - CALLED, - CARDINALITY, - CASCADED, - CASE, - CAST, - CEIL, - CEILING, - CHAR, - CHAR_LENGTH, - CHARACTER, - CHARACTER_LENGTH, - CHECK, - CLOB, - CLOSE, - COALESCE, - COLLATE, - COLLECT, - COLUMN, - COMMIT, - CONDITION, - CONNECT, - CONSTRAINT, - CONTAINS, - CONVERT, - COPY, - CORR, - CORRESPONDING, - COUNT, - COVAR_POP, - COVAR_SAMP, - CREATE, - CROSS, - CSV, - CUBE, - CUME_DIST, - CURRENT, - CURRENT_CATALOG, - CURRENT_DATE, - CURRENT_DEFAULT_TRANSFORM_GROUP, - CURRENT_PATH, - CURRENT_ROLE, - CURRENT_ROW, - CURRENT_SCHEMA, - CURRENT_TIME, - CURRENT_TIMESTAMP, - CURRENT_TRANSFORM_GROUP_FOR_TYPE, - CURRENT_USER, - CURSOR, - CYCLE, - DATE, - DAY, - DEALLOCATE, - DEC, - DECIMAL, - DECLARE, - DEFAULT, - DELETE, - DENSE_RANK, - DEREF, - DESC, - DESCRIBE, - DETERMINISTIC, - DISCONNECT, - DISTINCT, - DOUBLE, - DROP, - DYNAMIC, - EACH, - ELEMENT, - ELSE, - END, - END_FRAME, - END_PARTITION, - EQUALS, - ESCAPE, - EVERY, - EXCEPT, - EXEC, - EXECUTE, - EXISTS, - EXP, - EXTERNAL, - EXTRACT, - FALSE, - FETCH, - FILTER, - FIRST_VALUE, - FLOAT, - FLOOR, - FOR, - FOREIGN, - FRAME_ROW, - FREE, - FROM, - FULL, - FUNCTION, - FUSION, - GET, - GLOBAL, - GRANT, - GROUP, - GROUPING, - GROUPS, - HAVING, - HEADER, - HOLD, - HOUR, - IDENTITY, - IN, - INDICATOR, - INNER, - INOUT, - INSENSITIVE, - INSERT, - INT, - INTEGER, - INTERSECT, - INTERSECTION, - INTERVAL, - INTO, - IS, - JOIN, - KEY, - LAG, - LANGUAGE, - LARGE, - LAST_VALUE, - LATERAL, - LEAD, - LEADING, - LEFT, - LIKE, - LIKE_REGEX, - LIMIT, - LN, - LOCAL, - LOCALTIME, - LOCALTIMESTAMP, - LOCATION, - LOWER, - MATCH, - MAX, - MEMBER, - MERGE, - METHOD, - MIN, - MINUTE, - MOD, - MODIFIES, - MODULE, - MONTH, - MULTISET, - NATIONAL, - NATURAL, - NCHAR, - NCLOB, - NEW, - NO, - NONE, - NORMALIZE, - NOT, - NTH_VALUE, - NTILE, - NULL, - NULLIF, - NUMERIC, - OBJECT, - OCTET_LENGTH, - OCCURRENCES_REGEX, - OF, - OFFSET, - OLD, - ON, - ONLY, - OPEN, - OR, - ORDER, - OUT, - OUTER, - OVER, - OVERLAPS, - OVERLAY, - PARAMETER, - PARTITION, - PARQUET, - PERCENT, - PERCENT_RANK, - PERCENTILE_CONT, - PERCENTILE_DISC, - PERIOD, - PORTION, - POSITION, - POSITION_REGEX, - POWER, - PRECEDES, - PRECISION, - PREPARE, - PRIMARY, - PROCEDURE, - RANGE, - RANK, - READS, - REAL, - RECURSIVE, - REF, - REFERENCES, - REFERENCING, - REGCLASS, - REGR_AVGX, - REGR_AVGY, - REGR_COUNT, - REGR_INTERCEPT, - REGR_R2, - REGR_SLOPE, - REGR_SXX, - REGR_SXY, - REGR_SYY, - RELEASE, - RESULT, - RETURN, - RETURNS, - REVOKE, - RIGHT, - ROLLBACK, - ROLLUP, - ROW, - ROW_NUMBER, - ROWS, - SAVEPOINT, - SCOPE, - SCROLL, - SEARCH, - SECOND, - SELECT, - SENSITIVE, - SESSION_USER, - SET, - SIMILAR, - SMALLINT, - SOME, - SPECIFIC, - SPECIFICTYPE, - SQL, - SQLEXCEPTION, - SQLSTATE, - SQLWARNING, - SQRT, - START, - STATIC, - STDDEV_POP, - STDDEV_SAMP, - STDIN, - STORED, - SUBMULTISET, - SUBSTRING, - SUBSTRING_REGEX, - SUCCEEDS, - SUM, - SYMMETRIC, - SYSTEM, - SYSTEM_TIME, - SYSTEM_USER, - TABLE, - TABLESAMPLE, - TEXT, - THEN, - TIME, - TIMESTAMP, - TIMEZONE_HOUR, - TIMEZONE_MINUTE, - TO, - TRAILING, - TRANSLATE, - TRANSLATE_REGEX, - TRANSLATION, - TREAT, - TRIGGER, - TRUNCATE, - TRIM, - TRIM_ARRAY, - TRUE, - UESCAPE, - UNION, - UNIQUE, - UNKNOWN, - UNNEST, - UPDATE, - UPPER, - USER, - USING, - UUID, - VALUE, - VALUES, - VALUE_OF, - VAR_POP, - VAR_SAMP, - VARBINARY, - VARCHAR, - VARYING, - VERSIONING, - WHEN, - WHENEVER, - WHERE, - WIDTH_BUCKET, - WINDOW, - WITH, - WITHIN, - WITHOUT, - YEAR, - ZONE, - END_EXEC, -]; - -/// These keywords can't be used as a table alias, so that `FROM table_name alias` -/// can be parsed unambiguously without looking ahead. -pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ - WHERE, GROUP, ON, // keyword is 'reserved' in most dialects - JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle - NATURAL, USING, // not reserved in Oracle & MSSQL - ORDER, // UNION, EXCEPT, INTERSECT, // TODO add these with tests. -]; +pub static END_EXEC: &'static str = "END-EXEC"; diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 3298a1de3..1a704f000 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -8,6 +8,8 @@ pub use self::generic_sql::GenericSqlDialect; pub use self::postgresql::PostgreSqlDialect; pub trait Dialect { + /// Get a list of keywords for this dialect + fn keywords(&self) -> Vec<&'static str>; /// Determine if a character is a valid identifier start character fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid identifier character diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 2b64c1f0a..66cb51c19 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -1,8 +1,24 @@ use dialect::Dialect; +use dialect::keywords::*; + pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { + fn keywords(&self) -> Vec<&'static str> { + return vec![ + ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, + INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, + NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types + CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL, + DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, + DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, + STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, + THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, + LIKE, + ]; + } + fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index dbf4b9d48..54b650a84 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -25,18 +25,15 @@ pub use self::value::Value; pub use self::sql_operator::SQLOperator; -// This could be enhanced to remember the way the identifier was quoted -pub type SQLIdent = String; - /// SQL Abstract Syntax Tree (AST) #[derive(Debug, Clone, PartialEq)] pub enum ASTNode { /// Identifier e.g. table name or column name - SQLIdentifier(SQLIdent), + SQLIdentifier(String), /// Wildcard e.g. `*` SQLWildcard, /// Multi part identifier e.g. `myschema.dbo.mytable` - SQLCompoundIdentifier(Vec), + SQLCompoundIdentifier(Vec), /// Assigment e.g. `name = 'Fred'` in an UPDATE statement SQLAssignment(SQLAssignment), /// `IS NULL` expression @@ -72,17 +69,12 @@ pub enum ASTNode { results: Vec, else_result: Option>, }, - /// A table name or a parenthesized subquery with an optional alias - TableFactor { - relation: Box, // SQLNested or SQLCompoundIdentifier - alias: Option, - }, /// SELECT SQLSelect { /// projection expressions projection: Vec, /// FROM - relation: Option>, // TableFactor + relation: Option>, // JOIN joins: Vec, /// WHERE @@ -101,7 +93,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES (vector of rows to insert) values: Vec>, }, @@ -109,7 +101,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES a vector of values to be copied values: Vec>, }, @@ -196,13 +188,6 @@ impl ToString for ASTNode { } s + " END" } - ASTNode::TableFactor { relation, alias } => { - if let Some(alias) = alias { - format!("{} AS {}", relation.to_string(), alias) - } else { - relation.to_string() - } - } ASTNode::SQLSelect { projection, relation, @@ -381,21 +366,21 @@ impl ToString for SQLAssignment { #[derive(Debug, Clone, PartialEq)] pub struct SQLOrderByExpr { pub expr: Box, - pub asc: Option, + pub asc: bool, } impl SQLOrderByExpr { - pub fn new(expr: Box, asc: Option) -> Self { + pub fn new(expr: Box, asc: bool) -> Self { SQLOrderByExpr { expr, asc } } } impl ToString for SQLOrderByExpr { fn to_string(&self) -> String { - match self.asc { - Some(true) => format!("{} ASC", self.expr.to_string()), - Some(false) => format!("{} DESC", self.expr.to_string()), - None => self.expr.to_string(), + if self.asc { + format!("{} ASC", self.expr.as_ref().to_string()) + } else { + format!("{} DESC", self.expr.as_ref().to_string()) } } } @@ -403,7 +388,7 @@ impl ToString for SQLOrderByExpr { /// SQL column definition #[derive(Debug, Clone, PartialEq)] pub struct SQLColumnDef { - pub name: SQLIdent, + pub name: String, pub data_type: SQLType, pub is_primary: bool, pub is_unique: bool, @@ -432,7 +417,7 @@ impl ToString for SQLColumnDef { #[derive(Debug, Clone, PartialEq)] pub struct Join { - pub relation: ASTNode, // TableFactor + pub relation: ASTNode, pub join_operator: JoinOperator, } diff --git a/src/sqlast/table_key.rs b/src/sqlast/table_key.rs index f4ff70f4b..9dacc21b3 100644 --- a/src/sqlast/table_key.rs +++ b/src/sqlast/table_key.rs @@ -1,5 +1,3 @@ -use super::SQLIdent; - #[derive(Debug, PartialEq, Clone)] pub enum AlterOperation { AddConstraint(TableKey), @@ -19,8 +17,8 @@ impl ToString for AlterOperation { #[derive(Debug, PartialEq, Clone)] pub struct Key { - pub name: SQLIdent, - pub columns: Vec, + pub name: String, + pub columns: Vec, } #[derive(Debug, PartialEq, Clone)] @@ -31,7 +29,7 @@ pub enum TableKey { ForeignKey { key: Key, foreign_table: String, - referred_columns: Vec, + referred_columns: Vec, }, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 33b950de5..42a39b01e 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -14,7 +14,6 @@ //! SQL Parser -use super::dialect::keywords; use super::dialect::Dialect; use super::sqlast::*; use super::sqltokenizer::*; @@ -78,7 +77,9 @@ impl Parser { break; } - expr = self.parse_infix(expr, next_precedence)?; + if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { + expr = infix_expr; + } } Ok(expr) } @@ -91,7 +92,7 @@ impl Parser { loop { // stop parsing on `NULL` | `NOT NULL` match self.peek_token() { - Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break, + Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break, _ => {} } @@ -101,7 +102,9 @@ impl Parser { break; } - expr = self.parse_infix(expr, next_precedence)?; + if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { + expr = infix_expr; + } } Ok(expr) } @@ -110,7 +113,7 @@ impl Parser { pub fn parse_prefix(&mut self) -> Result { match self.next_token() { Some(t) => match t { - Token::SQLWord(w) => match w.keyword.as_ref() { + Token::Keyword(k) => match k.to_uppercase().as_ref() { "SELECT" => Ok(self.parse_select()?), "CREATE" => Ok(self.parse_create()?), "DELETE" => Ok(self.parse_delete()?), @@ -122,31 +125,38 @@ impl Parser { self.parse_sql_value() } "CASE" => self.parse_case_expression(), - "CAST" => self.parse_cast_expression(), "NOT" => Ok(ASTNode::SQLUnary { operator: SQLOperator::Not, expr: Box::new(self.parse_expr(0)?), }), - _ => match self.peek_token() { - Some(Token::LParen) => self.parse_function(&w.value), - Some(Token::Period) => { - let mut id_parts: Vec = vec![w.value]; - while self.consume_token(&Token::Period) { - match self.next_token() { - Some(Token::SQLWord(w)) => id_parts.push(w.value), - _ => { - return parser_err!(format!( - "Error parsing compound identifier" - )); + _ => return parser_err!(format!("No prefix parser for keyword {}", k)), + }, + Token::Mult => Ok(ASTNode::SQLWildcard), + Token::Identifier(id) => { + if "CAST" == id.to_uppercase() { + self.parse_cast_expression() + } else { + match self.peek_token() { + Some(Token::LParen) => self.parse_function(&id), + Some(Token::Period) => { + let mut id_parts: Vec = vec![id]; + while self.peek_token() == Some(Token::Period) { + self.expect_token(&Token::Period)?; + match self.next_token() { + Some(Token::Identifier(id)) => id_parts.push(id), + _ => { + return parser_err!(format!( + "Error parsing compound identifier" + )) + } } } + Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - Ok(ASTNode::SQLCompoundIdentifier(id_parts)) + _ => Ok(ASTNode::SQLIdentifier(id)), } - _ => Ok(ASTNode::SQLIdentifier(w.value)), - }, - }, - Token::Mult => Ok(ASTNode::SQLWildcard), + } + } Token::Number(_) | Token::SingleQuotedString(_) => { self.prev_token(); self.parse_sql_value() @@ -238,36 +248,40 @@ impl Parser { } /// Parse an expression infix (typically an operator) - pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result { + pub fn parse_infix( + &mut self, + expr: ASTNode, + precedence: u8, + ) -> Result, ParserError> { debug!("parsing infix"); match self.next_token() { Some(tok) => match tok { - Token::SQLWord(ref k) if k.keyword == "IS" => { + Token::Keyword(ref k) if k == "IS" => { if self.parse_keywords(vec!["NULL"]) { - Ok(ASTNode::SQLIsNull(Box::new(expr))) + Ok(Some(ASTNode::SQLIsNull(Box::new(expr)))) } else if self.parse_keywords(vec!["NOT", "NULL"]) { - Ok(ASTNode::SQLIsNotNull(Box::new(expr))) + Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr)))) } else { parser_err!("Invalid tokens after IS") } } - Token::SQLWord(ref k) if k.keyword == "NOT" => { + Token::Keyword(ref k) if k == "NOT" => { if self.parse_keywords(vec!["LIKE"]) { - Ok(ASTNode::SQLBinaryExpr { + Ok(Some(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: SQLOperator::NotLike, right: Box::new(self.parse_expr(precedence)?), - }) + })) } else { parser_err!("Invalid tokens after NOT") } } - Token::DoubleColon => { - let pg_cast = self.parse_pg_cast(expr)?; - Ok(pg_cast) - } - Token::SQLWord(_) - | Token::Eq + Token::Keyword(_) => Ok(Some(ASTNode::SQLBinaryExpr { + left: Box::new(expr), + op: self.to_sql_operator(&tok)?, + right: Box::new(self.parse_expr(precedence)?), + })), + Token::Eq | Token::Neq | Token::Gt | Token::GtEq @@ -277,16 +291,18 @@ impl Parser { | Token::Minus | Token::Mult | Token::Mod - | Token::Div => Ok(ASTNode::SQLBinaryExpr { + | Token::Div => Ok(Some(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: self.to_sql_operator(&tok)?, right: Box::new(self.parse_expr(precedence)?), - }), + })), + Token::DoubleColon => { + let pg_cast = self.parse_pg_cast(expr)?; + Ok(Some(pg_cast)) + } _ => parser_err!(format!("No infix parser for token {:?}", tok)), }, - // This is not supposed to happen, because of the precedence check - // in parse_expr. - None => parser_err!("Unexpected EOF in parse_infix"), + None => Ok(None), } } @@ -304,10 +320,10 @@ impl Parser { &Token::Mult => Ok(SQLOperator::Multiply), &Token::Div => Ok(SQLOperator::Divide), &Token::Mod => Ok(SQLOperator::Modulus), - &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And), - &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or), - //&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not), - &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like), + &Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And), + &Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or), + //&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not), + &Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like), _ => parser_err!(format!("Unsupported SQL operator {:?}", tok)), } } @@ -326,11 +342,11 @@ impl Parser { debug!("get_precedence() {:?}", tok); match tok { - &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5), - &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10), - &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), - &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15), - &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), + &Token::Keyword(ref k) if k == "OR" => Ok(5), + &Token::Keyword(ref k) if k == "AND" => Ok(10), + &Token::Keyword(ref k) if k == "NOT" => Ok(15), + &Token::Keyword(ref k) if k == "IS" => Ok(15), + &Token::Keyword(ref k) if k == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) } @@ -429,9 +445,13 @@ impl Parser { #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { match self.peek_token() { - Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => { - self.next_token(); - true + Some(Token::Keyword(k)) => { + if expected.eq_ignore_ascii_case(k.as_str()) { + self.next_token(); + true + } else { + false + } } _ => false, } @@ -502,7 +522,7 @@ impl Parser { let mut columns = vec![]; if self.consume_token(&Token::LParen) { loop { - if let Some(Token::SQLWord(column_name)) = self.next_token() { + if let Some(Token::Identifier(column_name)) = self.next_token() { if let Ok(data_type) = self.parse_data_type() { let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique = self.parse_keyword("UNIQUE"); @@ -525,7 +545,7 @@ impl Parser { Some(Token::Comma) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name.value, + name: column_name, data_type: data_type, allow_null, is_primary, @@ -536,7 +556,7 @@ impl Parser { Some(Token::RParen) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name.value, + name: column_name, data_type: data_type, allow_null, is_primary, @@ -590,16 +610,19 @@ impl Parser { } else if is_unique_key { Ok(TableKey::UniqueKey(key)) } else if is_foreign_key { - self.expect_keyword("REFERENCES")?; - let foreign_table = self.parse_tablename()?; - self.expect_token(&Token::LParen)?; - let referred_columns = self.parse_column_names()?; - self.expect_token(&Token::RParen)?; - Ok(TableKey::ForeignKey { - key, - foreign_table, - referred_columns, - }) + if self.parse_keyword("REFERENCES") { + let foreign_table = self.parse_tablename()?; + self.expect_token(&Token::LParen)?; + let referred_columns = self.parse_column_names()?; + self.expect_token(&Token::RParen)?; + Ok(TableKey::ForeignKey { + key, + foreign_table, + referred_columns, + }) + } else { + parser_err!("Expecting references") + } } else { parser_err!(format!( "Expecting primary key, unique key, or foreign key, found: {:?}", @@ -609,33 +632,39 @@ impl Parser { } pub fn parse_alter(&mut self) -> Result { - self.expect_keyword("TABLE")?; - let _ = self.parse_keyword("ONLY"); - let table_name = self.parse_tablename()?; - let operation: Result = - if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { - match self.next_token() { - Some(Token::SQLWord(ref id)) => { - let table_key = self.parse_table_key(&id.value)?; - Ok(AlterOperation::AddConstraint(table_key)) - } - _ => { - return parser_err!(format!( - "Expecting identifier, found : {:?}", - self.peek_token() - )); + if self.parse_keyword("TABLE") { + let _ = self.parse_keyword("ONLY"); + let table_name = self.parse_tablename()?; + let operation: Result = + if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { + match self.next_token() { + Some(Token::Identifier(ref id)) => { + let table_key = self.parse_table_key(id)?; + Ok(AlterOperation::AddConstraint(table_key)) + } + _ => { + return parser_err!(format!( + "Expecting identifier, found : {:?}", + self.peek_token() + )); + } } - } - } else { - return parser_err!(format!( - "Expecting ADD CONSTRAINT, found :{:?}", - self.peek_token() - )); - }; - Ok(ASTNode::SQLAlterTable { - name: table_name, - operation: operation?, - }) + } else { + return parser_err!(format!( + "Expecting ADD CONSTRAINT, found :{:?}", + self.peek_token() + )); + }; + Ok(ASTNode::SQLAlterTable { + name: table_name, + operation: operation?, + }) + } else { + parser_err!(format!( + "Expecting TABLE after ALTER, found {:?}", + self.peek_token() + )) + } } /// Parse a copy statement @@ -688,10 +717,8 @@ impl Parser { return Ok(values); } if let Some(token) = self.next_token() { - if let Token::SQLWord(SQLWord { value: v, .. }) = token { - if v == "N" { - values.push(None); - } + if token == Token::Identifier("N".to_string()) { + values.push(None); } } else { continue; @@ -710,16 +737,11 @@ impl Parser { match self.next_token() { Some(t) => { match t { - Token::SQLWord(k) => match k.keyword.as_ref() { + Token::Keyword(k) => match k.to_uppercase().as_ref() { "TRUE" => Ok(Value::Boolean(true)), "FALSE" => Ok(Value::Boolean(false)), "NULL" => Ok(Value::Null), - _ => { - return parser_err!(format!( - "No value parser for keyword {}", - k.keyword - )); - } + _ => return parser_err!(format!("No value parser for keyword {}", k)), }, //TODO: parse the timestamp here (see parse_timestamp_value()) Token::Number(ref n) if n.contains(".") => match n.parse::() { @@ -851,7 +873,7 @@ impl Parser { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { match self.next_token() { - Some(Token::SQLWord(k)) => match k.keyword.as_ref() { + Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { "BOOLEAN" => Ok(SQLType::Boolean), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)), "REAL" => Ok(SQLType::Real), @@ -936,92 +958,64 @@ impl Parser { let (precision, scale) = self.parse_optional_precision_scale()?; Ok(SQLType::Decimal(precision, scale)) } - _ => { - self.prev_token(); - let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type - Ok(SQLType::Custom(type_name)) - } + _ => parser_err!(format!("Invalid data type '{:?}'", k)), }, - other => parser_err!(format!("Invalid data type: '{:?}'", other)), - } - } - - /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) - /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, - /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` - pub fn parse_optional_alias( - &mut self, - reserved_kwds: &[&str], - ) -> Result, ParserError> { - let after_as = self.parse_keyword("AS"); - let maybe_alias = self.next_token(); - match maybe_alias { - // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). If there's no `AS`: don't parse keywords, - // which may start a construct allowed in this position, to be parsed as aliases. - // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, - // not an alias.) - Some(Token::SQLWord(ref w)) - if after_as || !reserved_kwds.contains(&w.keyword.as_str()) => - { - // have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default - Ok(Some(w.value.clone())) - } - ref not_an_ident if after_as => parser_err!(format!( - "Expected an identifier after AS, got {:?}", - not_an_ident - )), - Some(_not_an_ident) => { + Some(Token::Identifier(_)) => { self.prev_token(); - Ok(None) // no alias found + let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type + Ok(SQLType::Custom(type_name)) } - None => Ok(None), + other => parser_err!(format!("Invalid data type: '{:?}'", other)), } } - /// Parse one or more identifiers with the specified separator between them pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result { let mut idents = vec![]; let mut expect_identifier = true; loop { let token = &self.next_token(); match token { - Some(Token::SQLWord(s)) if expect_identifier => { - expect_identifier = false; - idents.push(s.to_string()); - } - Some(token) if token == separator && !expect_identifier => { - expect_identifier = true; - continue; - } - _ => { - if token.is_some() { + Some(token) => match token { + Token::Identifier(s) => { + if expect_identifier { + expect_identifier = false; + idents.push(s.to_string()); + } else { + self.prev_token(); + break; + } + } + token if token == separator => { + if expect_identifier { + return parser_err!(format!("Expecting identifier, found {:?}", token)); + } else { + expect_identifier = true; + continue; + } + } + _ => { self.prev_token(); + break; } + }, + None => { + self.prev_token(); break; } } } - if expect_identifier { - parser_err!(format!( - "Expecting identifier, found {:?}", - self.peek_token() - )) - } else { - Ok(ASTNode::SQLCompoundIdentifier(idents)) - } + Ok(ASTNode::SQLCompoundIdentifier(idents)) } pub fn parse_tablename(&mut self) -> Result { let identifier = self.parse_compound_identifier(&Token::Period)?; match identifier { - // TODO: should store the compound identifier itself ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")), other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), } } - pub fn parse_column_names(&mut self) -> Result, ParserError> { + pub fn parse_column_names(&mut self) -> Result, ParserError> { let identifier = self.parse_compound_identifier(&Token::Comma)?; match identifier { ASTNode::SQLCompoundIdentifier(idents) => Ok(idents), @@ -1096,7 +1090,7 @@ impl Parser { let projection = self.parse_expr_list()?; let (relation, joins): (Option>, Vec) = if self.parse_keyword("FROM") { - let relation = Some(Box::new(self.parse_table_factor()?)); + let relation = Some(Box::new(self.parse_expr(0)?)); let joins = self.parse_joins()?; (relation, joins) } else { @@ -1155,21 +1149,6 @@ impl Parser { } } - /// A table name or a parenthesized subquery, followed by optional `[AS] alias` - pub fn parse_table_factor(&mut self) -> Result { - let relation = if self.consume_token(&Token::LParen) { - self.prev_token(); - self.parse_expr(0)? - } else { - self.parse_compound_identifier(&Token::Period)? - }; - let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(ASTNode::TableFactor { - relation: Box::new(relation), - alias, - }) - } - fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) @@ -1177,20 +1156,26 @@ impl Parser { let constraint = self.parse_expr(0)?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { - self.expect_token(&Token::LParen)?; - let attributes = self - .parse_expr_list()? - .into_iter() - .map(|ast_node| match ast_node { - ASTNode::SQLIdentifier(ident) => Ok(ident), - unexpected => { - parser_err!(format!("Expected identifier, found {:?}", unexpected)) - } - }) - .collect::, ParserError>>()?; + if self.consume_token(&Token::LParen) { + let attributes = self + .parse_expr_list()? + .into_iter() + .map(|ast_node| match ast_node { + ASTNode::SQLIdentifier(ident) => Ok(ident), + unexpected => { + parser_err!(format!("Expected identifier, found {:?}", unexpected)) + } + }) + .collect::, ParserError>>()?; - self.expect_token(&Token::RParen)?; - Ok(JoinConstraint::Using(attributes)) + if self.consume_token(&Token::RParen) { + Ok(JoinConstraint::Using(attributes)) + } else { + parser_err!(format!("Expected token ')', found {:?}", self.peek_token())) + } + } else { + parser_err!(format!("Expected token '(', found {:?}", self.peek_token())) + } } else { parser_err!(format!( "Unexpected token after JOIN: {:?}", @@ -1205,7 +1190,7 @@ impl Parser { let natural = match &self.peek_token() { Some(Token::Comma) => { self.next_token(); - let relation = self.parse_table_factor()?; + let relation = self.parse_expr(0)?; let join = Join { relation, join_operator: JoinOperator::Implicit, @@ -1213,10 +1198,10 @@ impl Parser { joins.push(join); continue; } - Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => { + Some(Token::Keyword(kw)) if kw == "CROSS" => { self.next_token(); self.expect_keyword("JOIN")?; - let relation = self.parse_table_factor()?; + let relation = self.parse_expr(0)?; let join = Join { relation, join_operator: JoinOperator::Cross, @@ -1224,7 +1209,7 @@ impl Parser { joins.push(join); continue; } - Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => { + Some(Token::Keyword(kw)) if kw == "NATURAL" => { self.next_token(); true } @@ -1233,49 +1218,49 @@ impl Parser { }; let join = match &self.peek_token() { - Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => { + Some(Token::Keyword(kw)) if kw == "INNER" => { self.next_token(); self.expect_keyword("JOIN")?; Join { - relation: self.parse_table_factor()?, + relation: self.parse_expr(0)?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => { + Some(Token::Keyword(kw)) if kw == "JOIN" => { self.next_token(); Join { - relation: self.parse_table_factor()?, + relation: self.parse_expr(0)?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => { + Some(Token::Keyword(kw)) if kw == "LEFT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_table_factor()?, + relation: self.parse_expr(0)?, join_operator: JoinOperator::LeftOuter( self.parse_join_constraint(natural)?, ), } } - Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => { + Some(Token::Keyword(kw)) if kw == "RIGHT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_table_factor()?, + relation: self.parse_expr(0)?, join_operator: JoinOperator::RightOuter( self.parse_join_constraint(natural)?, ), } } - Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => { + Some(Token::Keyword(kw)) if kw == "FULL" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_table_factor()?, + relation: self.parse_expr(0)?, join_operator: JoinOperator::FullOuter( self.parse_join_constraint(natural)?, ), @@ -1336,19 +1321,33 @@ impl Parser { loop { let expr = self.parse_expr(0)?; - let asc = if self.parse_keyword("ASC") { - Some(true) - } else if self.parse_keyword("DESC") { - Some(false) - } else { - None + // look for optional ASC / DESC specifier + let asc = match self.peek_token() { + Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { + "ASC" => { + self.next_token(); + true + } + "DESC" => { + self.next_token(); + false + } + _ => true, + }, + Some(Token::Comma) => true, + _ => true, }; expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc)); - if let Some(Token::Comma) = self.peek_token() { - self.next_token(); + if let Some(t) = self.peek_token() { + if t == Token::Comma { + self.next_token(); + } else { + break; + } } else { + // EOF break; } } diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 0095e5058..504088227 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -21,20 +21,23 @@ use std::iter::Peekable; use std::str::Chars; -use super::dialect::keywords::ALL_KEYWORDS; use super::dialect::Dialect; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq)] pub enum Token { - /// A keyword (like SELECT) or an optionally quoted SQL identifier - SQLWord(SQLWord), + /// SQL identifier e.g. table or column name + Identifier(String), + /// SQL keyword e.g. Keyword("SELECT") + Keyword(String), /// Numeric literal Number(String), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), + /// Double quoted string: i.e: "string" + DoubleQuotedString(String), /// Comma Comma, /// Whitespace (space, tab, etc) @@ -90,10 +93,12 @@ pub enum Token { impl ToString for Token { fn to_string(&self) -> String { match self { - Token::SQLWord(ref w) => w.to_string(), + Token::Identifier(ref id) => id.to_string(), + Token::Keyword(ref k) => k.to_string(), Token::Number(ref n) => n.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), + Token::DoubleQuotedString(ref s) => format!("\"{}\"", s), Token::Comma => ",".to_string(), Token::Whitespace(ws) => ws.to_string(), Token::Eq => "=".to_string(), @@ -123,54 +128,6 @@ impl ToString for Token { } } -impl Token { - pub fn make_keyword(keyword: &str) -> Self { - Token::make_word(keyword, None) - } - pub fn make_word(word: &str, quote_style: Option) -> Self { - let word_uppercase = word.to_uppercase(); - //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is - // not fast but I want the simplicity for now while I experiment with pluggable - // dialects - let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str()); - Token::SQLWord(SQLWord { - value: word.to_string(), - quote_style: quote_style, - keyword: if is_keyword { - word_uppercase.to_string() - } else { - "".to_string() - }, - }) - } -} - -/// A keyword (like SELECT) or an optionally quoted SQL identifier -#[derive(Debug, Clone, PartialEq)] -pub struct SQLWord { - /// The value of the token, without the enclosing quotes, and with the - /// escape sequences (if any) processed (TODO: escapes are not handled) - pub value: String, - /// An identifier can be "quoted" (<delimited identifier> in ANSI parlance). - /// The standard and most implementations allow using double quotes for this, - /// but some implementations support other quoting styles as well (e.g. \[MS SQL]) - pub quote_style: Option, - /// If the word was not quoted and it matched one of the known keywords, - /// this will have one of the values from dialect::keywords, otherwise empty - pub keyword: String, -} - -impl ToString for SQLWord { - fn to_string(&self) -> String { - match self.quote_style { - Some('"') => format!("\"{}\"", self.value), - Some('[') => format!("[{}]", self.value), - None => self.value.clone(), - _ => panic!("Unexpected quote_style!"), - } - } -} - #[derive(Debug, Clone, PartialEq)] pub enum Whitespace { Space, @@ -211,6 +168,13 @@ impl<'a> Tokenizer<'a> { } } + fn is_keyword(&self, s: &str) -> bool { + //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is + // not fast but I want the simplicity for now while I experiment with pluggable + // dialects + return self.dialect.keywords().contains(&s); + } + /// Tokenize the statement and produce a vector of tokens pub fn tokenize(&mut self) -> Result, TokenizerError> { let mut peekable = self.query.chars().peekable(); @@ -225,10 +189,11 @@ impl<'a> Tokenizer<'a> { } Token::Whitespace(Whitespace::Tab) => self.col += 4, - Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64, - Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, + Token::Identifier(s) => self.col += s.len() as u64, + Token::Keyword(s) => self.col += s.len() as u64, Token::Number(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, + Token::DoubleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -267,12 +232,16 @@ impl<'a> Tokenizer<'a> { break; } } - Ok(Some(Token::make_word(&s, None))) + let upper_str = s.to_uppercase(); + if self.is_keyword(upper_str.as_str()) { + Ok(Some(Token::Keyword(upper_str))) + } else { + Ok(Some(Token::Identifier(s))) + } } // string '\'' => { //TODO: handle escaped quotes in string - //TODO: handle newlines in string //TODO: handle EOF before terminating quote let mut s = String::new(); chars.next(); // consume @@ -306,7 +275,7 @@ impl<'a> Tokenizer<'a> { } } } - Ok(Some(Token::make_word(&s, Some('"')))) + Ok(Some(Token::DoubleQuotedString(s))) } // numbers '0'...'9' => { @@ -420,7 +389,7 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::make_keyword("SELECT"), + Token::Keyword(String::from("SELECT")), Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), ]; @@ -436,9 +405,9 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::make_keyword("SELECT"), + Token::Keyword(String::from("SELECT")), Token::Whitespace(Whitespace::Space), - Token::make_word("sqrt", None), + Token::Identifier(String::from("sqrt")), Token::LParen, Token::Number(String::from("1")), Token::RParen, @@ -455,23 +424,23 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::make_keyword("SELECT"), + Token::Keyword(String::from("SELECT")), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), + Token::Keyword(String::from("FROM")), Token::Whitespace(Whitespace::Space), - Token::make_word("customer", None), + Token::Identifier(String::from("customer")), Token::Whitespace(Whitespace::Space), - Token::make_keyword("WHERE"), + Token::Keyword(String::from("WHERE")), Token::Whitespace(Whitespace::Space), - Token::make_word("id", None), + Token::Identifier(String::from("id")), Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), Token::Whitespace(Whitespace::Space), - Token::make_keyword("LIMIT"), + Token::Keyword(String::from("LIMIT")), Token::Whitespace(Whitespace::Space), Token::Number(String::from("5")), ]; @@ -487,17 +456,17 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::make_keyword("SELECT"), + Token::Keyword(String::from("SELECT")), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), + Token::Keyword(String::from("FROM")), Token::Whitespace(Whitespace::Space), - Token::make_word("customer", None), + Token::Identifier(String::from("customer")), Token::Whitespace(Whitespace::Space), - Token::make_keyword("WHERE"), + Token::Keyword(String::from("WHERE")), Token::Whitespace(Whitespace::Space), - Token::make_word("salary", None), + Token::Identifier(String::from("salary")), Token::Whitespace(Whitespace::Space), Token::Neq, Token::Whitespace(Whitespace::Space), @@ -522,7 +491,7 @@ mod tests { Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::make_word("h", None), + Token::Identifier("h".to_string()), ]; compare(expected, tokens); } @@ -538,20 +507,20 @@ mod tests { let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline), - Token::make_keyword("SELECT"), + Token::Keyword("SELECT".into()), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::make_keyword("FROM"), + Token::Keyword("FROM".into()), Token::Whitespace(Whitespace::Space), - Token::make_keyword("table"), + Token::Keyword("TABLE".into()), Token::Whitespace(Whitespace::Tab), Token::Char('م'), Token::Char('ص'), Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::make_word("h", None), + Token::Identifier("h".to_string()), ]; compare(expected, tokens); } @@ -564,11 +533,11 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::make_word("a", None), + Token::Identifier(String::from("a")), Token::Whitespace(Whitespace::Space), - Token::make_keyword("IS"), + Token::Keyword("IS".to_string()), Token::Whitespace(Whitespace::Space), - Token::make_keyword("NULL"), + Token::Keyword("NULL".to_string()), ]; compare(expected, tokens); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index c57e1d53c..5c8679724 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -229,33 +229,27 @@ fn parse_not_like() { #[test] fn parse_select_order_by() { - fn chk(sql: &str) { - match verified(&sql) { - ASTNode::SQLSelect { order_by, .. } => { - assert_eq!( - Some(vec![ - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: Some(true), - }, - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: Some(false), - }, - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), - asc: None, - }, - ]), - order_by - ); - } - _ => assert!(false), + let sql = String::from( + "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC", + ); + match verified(&sql) { + ASTNode::SQLSelect { order_by, .. } => { + assert_eq!( + Some(vec![ + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), + asc: true, + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), + asc: false, + }, + ]), + order_by + ); } + _ => assert!(false), } - chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); - // make sure ORDER is not treated as an alias - chk("SELECT id, fname, lname FROM customer ORDER BY lname ASC, fname DESC, id"); } #[test] @@ -272,11 +266,11 @@ fn parse_select_order_by_limit() { Some(vec![ SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: Some(true), + asc: true, }, SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: Some(false), + asc: false, }, ]), order_by @@ -541,10 +535,7 @@ fn parse_implicit_join() { assert_eq!( joins[0], Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), - alias: None, - }, + relation: ASTNode::SQLIdentifier("t2".to_string()), join_operator: JoinOperator::Implicit } ) @@ -563,10 +554,7 @@ fn parse_cross_join() { assert_eq!( joins[0], Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), - alias: None, - }, + relation: ASTNode::SQLIdentifier("t2".to_string()), join_operator: JoinOperator::Cross } ) @@ -579,14 +567,10 @@ fn parse_cross_join() { fn parse_joins_on() { fn join_with_constraint( relation: impl Into, - alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), - alias, - }, + relation: ASTNode::SQLIdentifier(relation.into()), join_operator: f(JoinConstraint::On(ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("c1".into())), op: SQLOperator::Eq, @@ -594,35 +578,21 @@ fn parse_joins_on() { })), } } - // Test parsing of aliases - assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), - vec![join_with_constraint( - "t2", - Some("foo".to_string()), - JoinOperator::Inner - )] - ); - parses_to( - "SELECT * FROM t1 JOIN t2 foo ON c1 = c2", - "SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2", - ); - // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", None, JoinOperator::Inner)] + vec![join_with_constraint("t2", JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] + vec![join_with_constraint("t2", JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] + vec![join_with_constraint("t2", JoinOperator::FullOuter)] ); } @@ -630,46 +600,29 @@ fn parse_joins_on() { fn parse_joins_using() { fn join_with_constraint( relation: impl Into, - alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), - alias, - }, + relation: ASTNode::SQLIdentifier(relation.into()), join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } } - // Test parsing of aliases - assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), - vec![join_with_constraint( - "t2", - Some("foo".to_string()), - JoinOperator::Inner - )] - ); - parses_to( - "SELECT * FROM t1 JOIN t2 foo USING(c1)", - "SELECT * FROM t1 JOIN t2 AS foo USING(c1)", - ); - // Test parsing of different join operators + assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", None, JoinOperator::Inner)] + vec![join_with_constraint("t2", JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] + vec![join_with_constraint("t2", JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] + vec![join_with_constraint("t2", JoinOperator::FullOuter)] ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 11b8cb5f2..6b6598c67 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -13,11 +13,20 @@ fn test_prev_index() { let sql: &str = "SELECT version()"; let mut parser = parser(sql); assert_eq!(parser.prev_token(), None); - assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); - assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); - assert_eq!(parser.prev_token(), Some(Token::make_word("version", None))); - assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); - assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT"))); + assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into()))); + assert_eq!( + parser.next_token(), + Some(Token::Identifier("version".into())) + ); + assert_eq!( + parser.prev_token(), + Some(Token::Identifier("version".into())) + ); + assert_eq!( + parser.peek_token(), + Some(Token::Identifier("version".into())) + ); + assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into()))); assert_eq!(parser.prev_token(), None); } @@ -103,13 +112,6 @@ fn parse_invalid_table_name() { assert!(ast.is_err()); } -#[test] -fn parse_no_table_name() { - let mut parser = parser(""); - let ast = parser.parse_tablename(); - assert!(ast.is_err()); -} - #[test] fn parse_insert_with_columns() { let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)");