From d8173d4196d3a943d62b684053cf0b4d2f8c5654 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 21 Jan 2019 02:28:39 +0300 Subject: [PATCH 01/45] Extract ASTNode::SQLSelect to a separate struct (1/5) This will allow re-using it for SQLStatement in a later commit. (Also split the new struct into a separate file, other query-related types will be moved here in a follow-up commit.) --- src/sqlast/mod.rs | 77 ++------------------------------------ src/sqlast/query.rs | 70 ++++++++++++++++++++++++++++++++++ src/sqlparser.rs | 4 +- tests/sqlparser_ansi.rs | 2 +- tests/sqlparser_generic.rs | 40 ++++++++++---------- 5 files changed, 97 insertions(+), 96 deletions(-) create mode 100644 src/sqlast/query.rs diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index dbf4b9d48..16f726ee5 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -14,11 +14,13 @@ //! SQL Abstract Syntax Tree (AST) types +mod query; mod sql_operator; mod sqltype; mod table_key; mod value; +pub use self::query::SQLSelect; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; pub use self::value::Value; @@ -78,24 +80,7 @@ pub enum ASTNode { alias: Option, }, /// SELECT - SQLSelect { - /// projection expressions - projection: Vec, - /// FROM - relation: Option>, // TableFactor - // JOIN - joins: Vec, - /// WHERE - selection: Option>, - /// ORDER BY - order_by: Option>, - /// GROUP BY - group_by: Option>, - /// HAVING - having: Option>, - /// LIMIT - limit: Option>, - }, + SQLSelect(SQLSelect), /// INSERT SQLInsert { /// TABLE @@ -203,61 +188,7 @@ impl ToString for ASTNode { relation.to_string() } } - ASTNode::SQLSelect { - projection, - relation, - joins, - selection, - order_by, - group_by, - having, - limit, - } => { - let mut s = format!( - "SELECT {}", - projection - .iter() - .map(|p| p.to_string()) - .collect::>() - .join(", ") - ); - if let Some(relation) = relation { - s += &format!(" FROM {}", relation.as_ref().to_string()); - } - for join in joins { - s += &join.to_string(); - } - if let Some(selection) = selection { - s += &format!(" WHERE {}", selection.as_ref().to_string()); - } - if let Some(group_by) = group_by { - s += &format!( - " GROUP BY {}", - group_by - .iter() - .map(|g| g.to_string()) - .collect::>() - .join(", ") - ); - } - if let Some(having) = having { - s += &format!(" HAVING {}", having.as_ref().to_string()); - } - if let Some(order_by) = order_by { - s += &format!( - " ORDER BY {}", - order_by - .iter() - .map(|o| o.to_string()) - .collect::>() - .join(", ") - ); - } - if let Some(limit) = limit { - s += &format!(" LIMIT {}", limit.as_ref().to_string()); - } - s - } + ASTNode::SQLSelect(s) => s.to_string(), ASTNode::SQLInsert { table_name, columns, diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs new file mode 100644 index 000000000..41c3c28e8 --- /dev/null +++ b/src/sqlast/query.rs @@ -0,0 +1,70 @@ +use super::*; + +#[derive(Debug, Clone, PartialEq)] +pub struct SQLSelect { + /// projection expressions + pub projection: Vec, + /// FROM + pub relation: Option>, // TableFactor + // JOIN + pub joins: Vec, + /// WHERE + pub selection: Option>, + /// ORDER BY + pub order_by: Option>, + /// GROUP BY + pub group_by: Option>, + /// HAVING + pub having: Option>, + /// LIMIT + pub limit: Option>, +} + +impl ToString for SQLSelect { + fn to_string(&self) -> String { + let mut s = format!( + "SELECT {}", + self.projection + .iter() + .map(|p| p.to_string()) + .collect::>() + .join(", ") + ); + if let Some(ref relation) = self.relation { + s += &format!(" FROM {}", relation.as_ref().to_string()); + } + for join in &self.joins { + s += &join.to_string(); + } + if let Some(ref selection) = self.selection { + s += &format!(" WHERE {}", selection.as_ref().to_string()); + } + if let Some(ref group_by) = self.group_by { + s += &format!( + " GROUP BY {}", + group_by + .iter() + .map(|g| g.to_string()) + .collect::>() + .join(", ") + ); + } + if let Some(ref having) = self.having { + s += &format!(" HAVING {}", having.as_ref().to_string()); + } + if let Some(ref order_by) = self.order_by { + s += &format!( + " ORDER BY {}", + order_by + .iter() + .map(|o| o.to_string()) + .collect::>() + .join(", ") + ); + } + if let Some(ref limit) = self.limit { + s += &format!(" LIMIT {}", limit.as_ref().to_string()); + } + s + } +} diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 33b950de5..b9a9ed908 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1142,7 +1142,7 @@ impl Parser { next_token )) } else { - Ok(ASTNode::SQLSelect { + Ok(ASTNode::SQLSelect(SQLSelect { projection, selection, relation, @@ -1151,7 +1151,7 @@ impl Parser { order_by, group_by, having, - }) + })) } } diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index 4fec4f49e..a54eaf814 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -11,7 +11,7 @@ fn parse_simple_select() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1"); let ast = parse_sql(&sql); match ast { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(3, projection.len()); } _ => assert!(false), diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index c57e1d53c..8d08605bf 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -62,9 +62,9 @@ fn parse_where_delete_statement() { fn parse_simple_select() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"); match verified(&sql) { - ASTNode::SQLSelect { + ASTNode::SQLSelect(SQLSelect { projection, limit, .. - } => { + }) => { assert_eq!(3, projection.len()); assert_eq!(Some(Box::new(ASTNode::SQLValue(Value::Long(5)))), limit); } @@ -76,7 +76,7 @@ fn parse_simple_select() { fn parse_select_wildcard() { let sql = String::from("SELECT * FROM customer"); match verified(&sql) { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!(ASTNode::SQLWildcard, projection[0]); } @@ -88,7 +88,7 @@ fn parse_select_wildcard() { fn parse_select_count_wildcard() { let sql = String::from("SELECT COUNT(*) FROM customer"); match verified(&sql) { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( ASTNode::SQLFunction { @@ -191,7 +191,7 @@ fn parse_is_not_null() { fn parse_like() { let sql = String::from("SELECT * FROM customers WHERE name LIKE '%a'"); match verified(&sql) { - ASTNode::SQLSelect { selection, .. } => { + ASTNode::SQLSelect(SQLSelect { selection, .. }) => { assert_eq!( ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("name".to_string())), @@ -211,7 +211,7 @@ fn parse_like() { fn parse_not_like() { let sql = String::from("SELECT * FROM customers WHERE name NOT LIKE '%a'"); match verified(&sql) { - ASTNode::SQLSelect { selection, .. } => { + ASTNode::SQLSelect(SQLSelect { selection, .. }) => { assert_eq!( ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("name".to_string())), @@ -231,7 +231,7 @@ fn parse_not_like() { fn parse_select_order_by() { fn chk(sql: &str) { match verified(&sql) { - ASTNode::SQLSelect { order_by, .. } => { + ASTNode::SQLSelect(SQLSelect { order_by, .. }) => { assert_eq!( Some(vec![ SQLOrderByExpr { @@ -265,9 +265,9 @@ fn parse_select_order_by_limit() { ); let ast = parse_sql(&sql); match ast { - ASTNode::SQLSelect { + ASTNode::SQLSelect(SQLSelect { order_by, limit, .. - } => { + }) => { assert_eq!( Some(vec![ SQLOrderByExpr { @@ -291,7 +291,7 @@ fn parse_select_order_by_limit() { fn parse_select_group_by() { let sql = String::from("SELECT id, fname, lname FROM customer GROUP BY lname, fname"); match verified(&sql) { - ASTNode::SQLSelect { group_by, .. } => { + ASTNode::SQLSelect(SQLSelect { group_by, .. }) => { assert_eq!( Some(vec![ ASTNode::SQLIdentifier("lname".to_string()), @@ -316,7 +316,7 @@ fn parse_limit_accepts_all() { fn parse_cast() { let sql = String::from("SELECT CAST(id AS bigint) FROM customer"); match verified(&sql) { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( ASTNode::SQLCast { @@ -377,7 +377,7 @@ fn parse_create_table() { fn parse_scalar_function_in_projection() { let sql = String::from("SELECT sqrt(id) FROM foo"); match verified(&sql) { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!( vec![ASTNode::SQLFunction { id: String::from("sqrt"), @@ -401,7 +401,7 @@ fn parse_aggregate_with_group_by() { fn parse_literal_string() { let sql = "SELECT 'one'"; match verified(&sql) { - ASTNode::SQLSelect { ref projection, .. } => { + ASTNode::SQLSelect(SQLSelect { ref projection, .. }) => { assert_eq!( projection[0], ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())) @@ -427,7 +427,7 @@ fn parse_simple_math_expr_minus() { fn parse_select_version() { let sql = "SELECT @@version"; match verified(&sql) { - ASTNode::SQLSelect { ref projection, .. } => { + ASTNode::SQLSelect(SQLSelect { ref projection, .. }) => { assert_eq!( projection[0], ASTNode::SQLIdentifier("@@version".to_string()) @@ -466,10 +466,10 @@ fn parse_case_expression() { let sql = "SELECT CASE WHEN bar IS NULL THEN 'null' WHEN bar = 0 THEN '=0' WHEN bar >= 0 THEN '>=0' ELSE '<0' END FROM foo"; let ast = parse_sql(&sql); assert_eq!(sql, ast.to_string()); - use self::ASTNode::*; + use self::ASTNode::{SQLBinaryExpr, SQLCase, SQLIdentifier, SQLIsNull, SQLValue}; use self::SQLOperator::*; match ast { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( SQLCase { @@ -507,7 +507,7 @@ fn parse_select_with_semi_colon() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1;"); let ast = parse_sql(&sql); match ast { - ASTNode::SQLSelect { projection, .. } => { + ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(3, projection.len()); } _ => assert!(false), @@ -536,7 +536,7 @@ fn parse_implicit_join() { let sql = "SELECT * FROM t1, t2"; match verified(sql) { - ASTNode::SQLSelect { joins, .. } => { + ASTNode::SQLSelect(SQLSelect { joins, .. }) => { assert_eq!(joins.len(), 1); assert_eq!( joins[0], @@ -558,7 +558,7 @@ fn parse_cross_join() { let sql = "SELECT * FROM t1 CROSS JOIN t2"; match verified(sql) { - ASTNode::SQLSelect { joins, .. } => { + ASTNode::SQLSelect(SQLSelect { joins, .. }) => { assert_eq!(joins.len(), 1); assert_eq!( joins[0], @@ -711,7 +711,7 @@ fn parses_to(from: &str, to: &str) { fn joins_from(ast: ASTNode) -> Vec { match ast { - ASTNode::SQLSelect { joins, .. } => joins, + ASTNode::SQLSelect(SQLSelect { joins, .. }) => joins, _ => panic!("Expected SELECT"), } } From 45a5f844afee8eca26c8ce51aa57e1a4749dccb6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 21 Jan 2019 02:33:16 +0300 Subject: [PATCH 02/45] Move SQLOrderByExpr and Join* to query.rs (2/5) --- src/sqlast/mod.rs | 94 +-------------------------------------------- src/sqlast/query.rs | 92 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 93 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 16f726ee5..2a3d96961 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -20,7 +20,7 @@ mod sqltype; mod table_key; mod value; -pub use self::query::SQLSelect; +pub use self::query::{Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLSelect}; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; pub use self::value::Value; @@ -308,29 +308,6 @@ impl ToString for SQLAssignment { } } -/// SQL ORDER BY expression -#[derive(Debug, Clone, PartialEq)] -pub struct SQLOrderByExpr { - pub expr: Box, - pub asc: Option, -} - -impl SQLOrderByExpr { - pub fn new(expr: Box, asc: Option) -> Self { - SQLOrderByExpr { expr, asc } - } -} - -impl ToString for SQLOrderByExpr { - fn to_string(&self) -> String { - match self.asc { - Some(true) => format!("{} ASC", self.expr.to_string()), - Some(false) => format!("{} DESC", self.expr.to_string()), - None => self.expr.to_string(), - } - } -} - /// SQL column definition #[derive(Debug, Clone, PartialEq)] pub struct SQLColumnDef { @@ -360,72 +337,3 @@ impl ToString for SQLColumnDef { s } } - -#[derive(Debug, Clone, PartialEq)] -pub struct Join { - pub relation: ASTNode, // TableFactor - pub join_operator: JoinOperator, -} - -impl ToString for Join { - fn to_string(&self) -> String { - fn prefix(constraint: &JoinConstraint) -> String { - match constraint { - JoinConstraint::Natural => "NATURAL ".to_string(), - _ => "".to_string(), - } - } - fn suffix(constraint: &JoinConstraint) -> String { - match constraint { - JoinConstraint::On(expr) => format!("ON {}", expr.to_string()), - JoinConstraint::Using(attrs) => format!("USING({})", attrs.join(", ")), - _ => "".to_string(), - } - } - match &self.join_operator { - JoinOperator::Inner(constraint) => format!( - " {}JOIN {} {}", - prefix(constraint), - self.relation.to_string(), - suffix(constraint) - ), - JoinOperator::Cross => format!(" CROSS JOIN {}", self.relation.to_string()), - JoinOperator::Implicit => format!(", {}", self.relation.to_string()), - JoinOperator::LeftOuter(constraint) => format!( - " {}LEFT JOIN {} {}", - prefix(constraint), - self.relation.to_string(), - suffix(constraint) - ), - JoinOperator::RightOuter(constraint) => format!( - " {}RIGHT JOIN {} {}", - prefix(constraint), - self.relation.to_string(), - suffix(constraint) - ), - JoinOperator::FullOuter(constraint) => format!( - " {}FULL JOIN {} {}", - prefix(constraint), - self.relation.to_string(), - suffix(constraint) - ), - } - } -} - -#[derive(Debug, Clone, PartialEq)] -pub enum JoinOperator { - Inner(JoinConstraint), - LeftOuter(JoinConstraint), - RightOuter(JoinConstraint), - FullOuter(JoinConstraint), - Implicit, - Cross, -} - -#[derive(Debug, Clone, PartialEq)] -pub enum JoinConstraint { - On(ASTNode), - Using(Vec), - Natural, -} diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 41c3c28e8..81cc66c00 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -68,3 +68,95 @@ impl ToString for SQLSelect { s } } + +#[derive(Debug, Clone, PartialEq)] +pub struct Join { + pub relation: ASTNode, // TableFactor + pub join_operator: JoinOperator, +} + +impl ToString for Join { + fn to_string(&self) -> String { + fn prefix(constraint: &JoinConstraint) -> String { + match constraint { + JoinConstraint::Natural => "NATURAL ".to_string(), + _ => "".to_string(), + } + } + fn suffix(constraint: &JoinConstraint) -> String { + match constraint { + JoinConstraint::On(expr) => format!("ON {}", expr.to_string()), + JoinConstraint::Using(attrs) => format!("USING({})", attrs.join(", ")), + _ => "".to_string(), + } + } + match &self.join_operator { + JoinOperator::Inner(constraint) => format!( + " {}JOIN {} {}", + prefix(constraint), + self.relation.to_string(), + suffix(constraint) + ), + JoinOperator::Cross => format!(" CROSS JOIN {}", self.relation.to_string()), + JoinOperator::Implicit => format!(", {}", self.relation.to_string()), + JoinOperator::LeftOuter(constraint) => format!( + " {}LEFT JOIN {} {}", + prefix(constraint), + self.relation.to_string(), + suffix(constraint) + ), + JoinOperator::RightOuter(constraint) => format!( + " {}RIGHT JOIN {} {}", + prefix(constraint), + self.relation.to_string(), + suffix(constraint) + ), + JoinOperator::FullOuter(constraint) => format!( + " {}FULL JOIN {} {}", + prefix(constraint), + self.relation.to_string(), + suffix(constraint) + ), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum JoinOperator { + Inner(JoinConstraint), + LeftOuter(JoinConstraint), + RightOuter(JoinConstraint), + FullOuter(JoinConstraint), + Implicit, + Cross, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum JoinConstraint { + On(ASTNode), + Using(Vec), + Natural, +} + +/// SQL ORDER BY expression +#[derive(Debug, Clone, PartialEq)] +pub struct SQLOrderByExpr { + pub expr: Box, + pub asc: Option, +} + +impl SQLOrderByExpr { + pub fn new(expr: Box, asc: Option) -> Self { + SQLOrderByExpr { expr, asc } + } +} + +impl ToString for SQLOrderByExpr { + fn to_string(&self) -> String { + match self.asc { + Some(true) => format!("{} ASC", self.expr.to_string()), + Some(false) => format!("{} DESC", self.expr.to_string()), + None => self.expr.to_string(), + } + } +} From 7b86f5c842e612590ab25633c60b1cea0b9f8d3f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 29 Jan 2019 15:00:50 +0300 Subject: [PATCH 03/45] Remove unused ASTNode::SQLAssignment variant (3/5) The SQLAssignment *struct* is used directly in ASTNode::SQLUpdate (will change to SQLStatement::SQLUpdate shortly). --- src/sqlast/mod.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 2a3d96961..93ab56d63 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -39,8 +39,6 @@ pub enum ASTNode { SQLWildcard, /// Multi part identifier e.g. `myschema.dbo.mytable` SQLCompoundIdentifier(Vec), - /// Assigment e.g. `name = 'Fred'` in an UPDATE statement - SQLAssignment(SQLAssignment), /// `IS NULL` expression SQLIsNull(Box), /// `IS NOT NULL` expression @@ -135,7 +133,6 @@ impl ToString for ASTNode { ASTNode::SQLIdentifier(s) => s.to_string(), ASTNode::SQLWildcard => "*".to_string(), ASTNode::SQLCompoundIdentifier(s) => s.join("."), - ASTNode::SQLAssignment(ass) => ass.to_string(), ASTNode::SQLIsNull(ast) => format!("{} IS NULL", ast.as_ref().to_string()), ASTNode::SQLIsNotNull(ast) => format!("{} IS NOT NULL", ast.as_ref().to_string()), ASTNode::SQLBinaryExpr { left, op, right } => format!( @@ -295,7 +292,6 @@ impl ToString for ASTNode { } /// SQL assignment `foo = expr` as used in SQLUpdate -/// TODO: unify this with the ASTNode SQLAssignment #[derive(Debug, Clone, PartialEq)] pub struct SQLAssignment { id: String, From 2dec65fdb45f91d45b27f5a42b9e59daa698d43e Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 04:56:20 +0300 Subject: [PATCH 04/45] Separate statement from expr parsing (4/5) Continuing from https://github.com/andygrove/sqlparser-rs/pull/33#issuecomment-453060427 This stops the parser from accepting (and the AST from being able to represent) SQL look-alike code that makes no sense, e.g. SELECT ... FROM (CREATE TABLE ...) foo SELECT ... FROM (1+CAST(...)) foo Generally this makes the AST less "partially typed": meaning certain parts are strongly typed (e.g. SELECT can only contain projections, relations, etc.), while everything that didn't get its own type is dumped into ASTNode, effectively untyped. After a few more fixes (yet to be implemented), `ASTNode` could become an `SQLExpression`. The Pratt-style expression parser (returning an SQLExpression) would be invoked from the top-down parser in places where a generic expression is expected (e.g. after SELECT <...>, WHERE <...>, etc.), while things like select's `projection` and `relation` could be more appropriately (narrowly) typed. Since the diff is quite large due to necessarily large number of mechanical changes, here's an overview: 1) Interface changes: - A new AST enum - `SQLStatement` - is split out of ASTNode: - The variants of the ASTNode enum, which _only_ make sense as a top level statement (INSERT, UPDATE, DELETE, CREATE, ALTER, COPY) are _moved_ to the new enum, with no other changes. - SQLSelect is _duplicated_: now available both as a variant in SQLStatement::SQLSelect (top-level SELECT) and ASTNode:: (subquery). - The main entry point (Parser::parse_sql) now expects an SQL statement as input, and returns an `SQLStatement`. 2) Parser changes: instead of detecting the top-level constructs deep down in the precedence parser (`parse_prefix`) we are able to do it just right after setting up the parser in the `parse_sql` entry point (SELECT, again, is kept in the expression parser to demonstrate how subqueries could be implemented). The rest of parser changes are mechanical ASTNode -> SQLStatement replacements resulting from the AST change. 3) Testing changes: for every test - depending on whether the input was a complete statement or an expresssion - I used an appropriate helper function: - `verified` (parses SQL, checks that it round-trips, and returns the AST) - was replaced by `verified_stmt` or `verified_expr`. - `parse_sql` (which returned AST without checking it round-tripped) was replaced by: - `parse_sql_expr` (same function, for expressions) - `one_statement_parses_to` (formerly `parses_to`), extended to deal with statements that are not expected to round-trip. The weird name is to reduce further churn when implementing multi-statement parsing. - `verified_stmt` (in 4 testcases that actually round-tripped) --- src/sqlast/mod.rs | 119 +++++++++++++---------- src/sqlparser.rs | 62 ++++++++---- tests/sqlparser_ansi.rs | 4 +- tests/sqlparser_generic.rs | 184 +++++++++++++++++++----------------- tests/sqlparser_postgres.rs | 69 ++++++++------ 5 files changed, 245 insertions(+), 193 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 93ab56d63..811697ebf 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -79,52 +79,6 @@ pub enum ASTNode { }, /// SELECT SQLSelect(SQLSelect), - /// INSERT - SQLInsert { - /// TABLE - table_name: String, - /// COLUMNS - columns: Vec, - /// VALUES (vector of rows to insert) - values: Vec>, - }, - SQLCopy { - /// TABLE - table_name: String, - /// COLUMNS - columns: Vec, - /// VALUES a vector of values to be copied - values: Vec>, - }, - /// UPDATE - SQLUpdate { - /// TABLE - table_name: String, - /// Column assignments - assignments: Vec, - /// WHERE - selection: Option>, - }, - /// DELETE - SQLDelete { - /// FROM - relation: Option>, - /// WHERE - selection: Option>, - }, - /// CREATE TABLE - SQLCreateTable { - /// Table name - name: String, - /// Optional schema - columns: Vec, - }, - /// ALTER TABLE - SQLAlterTable { - /// Table name - name: String, - operation: AlterOperation, - }, } impl ToString for ASTNode { @@ -186,7 +140,68 @@ impl ToString for ASTNode { } } ASTNode::SQLSelect(s) => s.to_string(), - ASTNode::SQLInsert { + } + } +} + +/// A top-level statement (SELECT, INSERT, CREATE, etc.) +#[derive(Debug, Clone, PartialEq)] +pub enum SQLStatement { + /// SELECT + SQLSelect(SQLSelect), + /// INSERT + SQLInsert { + /// TABLE + table_name: String, + /// COLUMNS + columns: Vec, + /// VALUES (vector of rows to insert) + values: Vec>, + }, + SQLCopy { + /// TABLE + table_name: String, + /// COLUMNS + columns: Vec, + /// VALUES a vector of values to be copied + values: Vec>, + }, + /// UPDATE + SQLUpdate { + /// TABLE + table_name: String, + /// Column assignments + assignments: Vec, + /// WHERE + selection: Option>, + }, + /// DELETE + SQLDelete { + /// FROM + relation: Option>, + /// WHERE + selection: Option>, + }, + /// CREATE TABLE + SQLCreateTable { + /// Table name + name: String, + /// Optional schema + columns: Vec, + }, + /// ALTER TABLE + SQLAlterTable { + /// Table name + name: String, + operation: AlterOperation, + }, +} + +impl ToString for SQLStatement { + fn to_string(&self) -> String { + match self { + SQLStatement::SQLSelect(s) => s.to_string(), + SQLStatement::SQLInsert { table_name, columns, values, @@ -211,7 +226,7 @@ impl ToString for ASTNode { } s } - ASTNode::SQLCopy { + SQLStatement::SQLCopy { table_name, columns, values, @@ -241,7 +256,7 @@ impl ToString for ASTNode { s += "\n\\."; s } - ASTNode::SQLUpdate { + SQLStatement::SQLUpdate { table_name, assignments, selection, @@ -262,7 +277,7 @@ impl ToString for ASTNode { } s } - ASTNode::SQLDelete { + SQLStatement::SQLDelete { relation, selection, } => { @@ -275,7 +290,7 @@ impl ToString for ASTNode { } s } - ASTNode::SQLCreateTable { name, columns } => format!( + SQLStatement::SQLCreateTable { name, columns } => format!( "CREATE TABLE {} ({})", name, columns @@ -284,7 +299,7 @@ impl ToString for ASTNode { .collect::>() .join(", ") ), - ASTNode::SQLAlterTable { name, operation } => { + SQLStatement::SQLAlterTable { name, operation } => { format!("ALTER TABLE {} {}", name, operation.to_string()) } } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b9a9ed908..d153c2657 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -54,11 +54,36 @@ impl Parser { } /// Parse a SQL statement and produce an Abstract Syntax Tree (AST) - pub fn parse_sql(dialect: &Dialect, sql: String) -> Result { + pub fn parse_sql(dialect: &Dialect, sql: String) -> Result { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize()?; let mut parser = Parser::new(tokens); - parser.parse() + parser.parse_statement() + } + + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.) + pub fn parse_statement(&mut self) -> Result { + match self.next_token() { + Some(t) => match t { + Token::SQLWord(ref w) if w.keyword != "" => match w.keyword.as_ref() { + "SELECT" => Ok(SQLStatement::SQLSelect(self.parse_select()?)), + "CREATE" => Ok(self.parse_create()?), + "DELETE" => Ok(self.parse_delete()?), + "INSERT" => Ok(self.parse_insert()?), + "ALTER" => Ok(self.parse_alter()?), + "COPY" => Ok(self.parse_copy()?), + _ => parser_err!(format!( + "Unexpected keyword {:?} at the beginning of a statement", + w.to_string() + )), + }, + unexpected => parser_err!(format!( + "Unexpected {:?} at the beginning of a statement", + unexpected + )), + }, + _ => parser_err!("Unexpected end of file"), + } } /// Parse a new expression @@ -111,12 +136,7 @@ impl Parser { match self.next_token() { Some(t) => match t { Token::SQLWord(w) => match w.keyword.as_ref() { - "SELECT" => Ok(self.parse_select()?), - "CREATE" => Ok(self.parse_create()?), - "DELETE" => Ok(self.parse_delete()?), - "INSERT" => Ok(self.parse_insert()?), - "ALTER" => Ok(self.parse_alter()?), - "COPY" => Ok(self.parse_copy()?), + "SELECT" => Ok(ASTNode::SQLSelect(self.parse_select()?)), "TRUE" | "FALSE" | "NULL" => { self.prev_token(); self.parse_sql_value() @@ -495,7 +515,7 @@ impl Parser { } /// Parse a SQL CREATE statement - pub fn parse_create(&mut self) -> Result { + pub fn parse_create(&mut self) -> Result { if self.parse_keywords(vec!["TABLE"]) { let table_name = self.parse_tablename()?; // parse optional column list (schema) @@ -562,7 +582,7 @@ impl Parser { } } } - Ok(ASTNode::SQLCreateTable { + Ok(SQLStatement::SQLCreateTable { name: table_name, columns, }) @@ -608,7 +628,7 @@ impl Parser { } } - pub fn parse_alter(&mut self) -> Result { + pub fn parse_alter(&mut self) -> Result { self.expect_keyword("TABLE")?; let _ = self.parse_keyword("ONLY"); let table_name = self.parse_tablename()?; @@ -632,14 +652,14 @@ impl Parser { self.peek_token() )); }; - Ok(ASTNode::SQLAlterTable { + Ok(SQLStatement::SQLAlterTable { name: table_name, operation: operation?, }) } /// Parse a copy statement - pub fn parse_copy(&mut self) -> Result { + pub fn parse_copy(&mut self) -> Result { let table_name = self.parse_tablename()?; let columns = if self.consume_token(&Token::LParen) { let column_names = self.parse_column_names()?; @@ -652,7 +672,7 @@ impl Parser { self.expect_keyword("STDIN")?; self.expect_token(&Token::SemiColon)?; let values = self.parse_tsv()?; - Ok(ASTNode::SQLCopy { + Ok(SQLStatement::SQLCopy { table_name, columns, values, @@ -1062,7 +1082,7 @@ impl Parser { } } - pub fn parse_delete(&mut self) -> Result { + pub fn parse_delete(&mut self) -> Result { let relation: Option> = if self.parse_keyword("FROM") { Some(Box::new(self.parse_expr(0)?)) } else { @@ -1084,7 +1104,7 @@ impl Parser { next_token )) } else { - Ok(ASTNode::SQLDelete { + Ok(SQLStatement::SQLDelete { relation, selection, }) @@ -1092,7 +1112,7 @@ impl Parser { } /// Parse a SELECT statement - pub fn parse_select(&mut self) -> Result { + pub fn parse_select(&mut self) -> Result { let projection = self.parse_expr_list()?; let (relation, joins): (Option>, Vec) = if self.parse_keyword("FROM") { @@ -1142,7 +1162,7 @@ impl Parser { next_token )) } else { - Ok(ASTNode::SQLSelect(SQLSelect { + Ok(SQLSelect { projection, selection, relation, @@ -1151,7 +1171,7 @@ impl Parser { order_by, group_by, having, - })) + }) } } @@ -1290,7 +1310,7 @@ impl Parser { } /// Parse an INSERT statement - pub fn parse_insert(&mut self) -> Result { + pub fn parse_insert(&mut self) -> Result { self.expect_keyword("INTO")?; let table_name = self.parse_tablename()?; let columns = if self.consume_token(&Token::LParen) { @@ -1304,7 +1324,7 @@ impl Parser { self.expect_token(&Token::LParen)?; let values = self.parse_expr_list()?; self.expect_token(&Token::RParen)?; - Ok(ASTNode::SQLInsert { + Ok(SQLStatement::SQLInsert { table_name, columns, values: vec![values], diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index a54eaf814..b22482ae1 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -9,7 +9,7 @@ use sqlparser::sqltokenizer::*; #[test] fn parse_simple_select() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1"); - let ast = parse_sql(&sql); + let ast = parse_sql_expr(&sql); match ast { ASTNode::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(3, projection.len()); @@ -18,7 +18,7 @@ fn parse_simple_select() { } } -fn parse_sql(sql: &str) -> ASTNode { +fn parse_sql_expr(sql: &str) -> ASTNode { let dialect = AnsiSqlDialect {}; let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 8d08605bf..455c5a609 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -10,8 +10,8 @@ use sqlparser::sqltokenizer::*; fn parse_delete_statement() { let sql: &str = "DELETE FROM 'table'"; - match verified(&sql) { - ASTNode::SQLDelete { relation, .. } => { + match verified_stmt(&sql) { + SQLStatement::SQLDelete { relation, .. } => { assert_eq!( Some(Box::new(ASTNode::SQLValue(Value::SingleQuotedString( "table".to_string() @@ -31,8 +31,8 @@ fn parse_where_delete_statement() { use self::ASTNode::*; use self::SQLOperator::*; - match verified(&sql) { - ASTNode::SQLDelete { + match verified_stmt(&sql) { + SQLStatement::SQLDelete { relation, selection, .. @@ -61,8 +61,8 @@ fn parse_where_delete_statement() { #[test] fn parse_simple_select() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, limit, .. }) => { assert_eq!(3, projection.len()); @@ -75,8 +75,8 @@ fn parse_simple_select() { #[test] fn parse_select_wildcard() { let sql = String::from("SELECT * FROM customer"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!(ASTNode::SQLWildcard, projection[0]); } @@ -87,8 +87,8 @@ fn parse_select_wildcard() { #[test] fn parse_select_count_wildcard() { let sql = String::from("SELECT COUNT(*) FROM customer"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( ASTNode::SQLFunction { @@ -108,7 +108,7 @@ fn parse_not() { "SELECT id FROM customer \ WHERE NOT salary = ''", ); - let _ast = verified(&sql); + let _ast = verified_stmt(&sql); //TODO: add assertions } @@ -118,14 +118,14 @@ fn parse_select_string_predicate() { "SELECT id, fname, lname FROM customer \ WHERE salary != 'Not Provided' AND salary != ''", ); - let _ast = verified(&sql); + let _ast = verified_stmt(&sql); //TODO: add assertions } #[test] fn parse_projection_nested_type() { let sql = String::from("SELECT customer.address.state FROM foo"); - let _ast = verified(&sql); + let _ast = verified_stmt(&sql); //TODO: add assertions } @@ -144,7 +144,7 @@ fn parse_compound_expr_1() { right: Box::new(SQLIdentifier("c".to_string())) }) }, - verified(&sql) + verified_expr(&sql) ); } @@ -163,7 +163,7 @@ fn parse_compound_expr_2() { op: Plus, right: Box::new(SQLIdentifier("c".to_string())) }, - verified(&sql) + verified_expr(&sql) ); } @@ -173,7 +173,7 @@ fn parse_is_null() { let sql = String::from("a IS NULL"); assert_eq!( SQLIsNull(Box::new(SQLIdentifier("a".to_string()))), - verified(&sql) + verified_expr(&sql) ); } @@ -183,15 +183,15 @@ fn parse_is_not_null() { let sql = String::from("a IS NOT NULL"); assert_eq!( SQLIsNotNull(Box::new(SQLIdentifier("a".to_string()))), - verified(&sql) + verified_expr(&sql) ); } #[test] fn parse_like() { let sql = String::from("SELECT * FROM customers WHERE name LIKE '%a'"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { selection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { selection, .. }) => { assert_eq!( ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("name".to_string())), @@ -210,8 +210,8 @@ fn parse_like() { #[test] fn parse_not_like() { let sql = String::from("SELECT * FROM customers WHERE name NOT LIKE '%a'"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { selection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { selection, .. }) => { assert_eq!( ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("name".to_string())), @@ -230,8 +230,8 @@ fn parse_not_like() { #[test] fn parse_select_order_by() { fn chk(sql: &str) { - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { order_by, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { order_by, .. }) => { assert_eq!( Some(vec![ SQLOrderByExpr { @@ -263,9 +263,8 @@ fn parse_select_order_by_limit() { let sql = String::from( "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC LIMIT 2", ); - let ast = parse_sql(&sql); - match ast { - ASTNode::SQLSelect(SQLSelect { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { order_by, limit, .. }) => { assert_eq!( @@ -290,8 +289,8 @@ fn parse_select_order_by_limit() { #[test] fn parse_select_group_by() { let sql = String::from("SELECT id, fname, lname FROM customer GROUP BY lname, fname"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { group_by, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { group_by, .. }) => { assert_eq!( Some(vec![ ASTNode::SQLIdentifier("lname".to_string()), @@ -306,7 +305,7 @@ fn parse_select_group_by() { #[test] fn parse_limit_accepts_all() { - parses_to( + one_statement_parses_to( "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT ALL", "SELECT id, fname, lname FROM customer WHERE id = 1", ); @@ -315,8 +314,8 @@ fn parse_limit_accepts_all() { #[test] fn parse_cast() { let sql = String::from("SELECT CAST(id AS bigint) FROM customer"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( ASTNode::SQLCast { @@ -328,7 +327,7 @@ fn parse_cast() { } _ => assert!(false), } - parses_to( + one_statement_parses_to( "SELECT CAST(id AS BIGINT) FROM customer", "SELECT CAST(id AS bigint) FROM customer", ); @@ -342,15 +341,15 @@ fn parse_create_table() { lat DOUBLE NULL,\ lng DOUBLE NULL)", ); - parses_to( + let ast = one_statement_parses_to( &sql, "CREATE TABLE uk_cities (\ name character varying(100) NOT NULL, \ lat double, \ lng double)", ); - match parse_sql(&sql) { - ASTNode::SQLCreateTable { name, columns } => { + match ast { + SQLStatement::SQLCreateTable { name, columns } => { assert_eq!("uk_cities", name); assert_eq!(3, columns.len()); @@ -376,8 +375,8 @@ fn parse_create_table() { #[test] fn parse_scalar_function_in_projection() { let sql = String::from("SELECT sqrt(id) FROM foo"); - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!( vec![ASTNode::SQLFunction { id: String::from("sqrt"), @@ -393,15 +392,15 @@ fn parse_scalar_function_in_projection() { #[test] fn parse_aggregate_with_group_by() { let sql = String::from("SELECT a, COUNT(1), MIN(b), MAX(b) FROM foo GROUP BY a"); - let _ast = verified(&sql); + let _ast = verified_stmt(&sql); //TODO: assertions } #[test] fn parse_literal_string() { let sql = "SELECT 'one'"; - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { ref projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { ref projection, .. }) => { assert_eq!( projection[0], ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())) @@ -414,20 +413,20 @@ fn parse_literal_string() { #[test] fn parse_simple_math_expr_plus() { let sql = "SELECT a + b, 2 + a, 2.5 + a, a_f + b_f, 2 + a_f, 2.5 + a_f FROM c"; - parse_sql(&sql); + verified_stmt(&sql); } #[test] fn parse_simple_math_expr_minus() { let sql = "SELECT a - b, 2 - a, 2.5 - a, a_f - b_f, 2 - a_f, 2.5 - a_f FROM c"; - parse_sql(&sql); + verified_stmt(&sql); } #[test] fn parse_select_version() { let sql = "SELECT @@version"; - match verified(&sql) { - ASTNode::SQLSelect(SQLSelect { ref projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { ref projection, .. }) => { assert_eq!( projection[0], ASTNode::SQLIdentifier("@@version".to_string()) @@ -442,7 +441,7 @@ fn parse_parens() { use self::ASTNode::*; use self::SQLOperator::*; let sql = "(a + b) - (c + d)"; - let ast = parse_sql(&sql); + let ast = parse_sql_expr(&sql); assert_eq!( SQLBinaryExpr { left: Box::new(SQLBinaryExpr { @@ -464,12 +463,10 @@ fn parse_parens() { #[test] fn parse_case_expression() { let sql = "SELECT CASE WHEN bar IS NULL THEN 'null' WHEN bar = 0 THEN '=0' WHEN bar >= 0 THEN '>=0' ELSE '<0' END FROM foo"; - let ast = parse_sql(&sql); - assert_eq!(sql, ast.to_string()); use self::ASTNode::{SQLBinaryExpr, SQLCase, SQLIdentifier, SQLIsNull, SQLValue}; use self::SQLOperator::*; - match ast { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match verified_stmt(&sql) { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(1, projection.len()); assert_eq!( SQLCase { @@ -505,9 +502,8 @@ fn parse_case_expression() { #[test] fn parse_select_with_semi_colon() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1;"); - let ast = parse_sql(&sql); - match ast { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + match one_statement_parses_to(&sql, "") { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(3, projection.len()); } _ => assert!(false), @@ -518,8 +514,8 @@ fn parse_select_with_semi_colon() { fn parse_delete_with_semi_colon() { let sql: &str = "DELETE FROM 'table';"; - match parse_sql(&sql) { - ASTNode::SQLDelete { relation, .. } => { + match one_statement_parses_to(&sql, "") { + SQLStatement::SQLDelete { relation, .. } => { assert_eq!( Some(Box::new(ASTNode::SQLValue(Value::SingleQuotedString( "table".to_string() @@ -535,8 +531,8 @@ fn parse_delete_with_semi_colon() { fn parse_implicit_join() { let sql = "SELECT * FROM t1, t2"; - match verified(sql) { - ASTNode::SQLSelect(SQLSelect { joins, .. }) => { + match verified_stmt(sql) { + SQLStatement::SQLSelect(SQLSelect { joins, .. }) => { assert_eq!(joins.len(), 1); assert_eq!( joins[0], @@ -557,8 +553,8 @@ fn parse_implicit_join() { fn parse_cross_join() { let sql = "SELECT * FROM t1 CROSS JOIN t2"; - match verified(sql) { - ASTNode::SQLSelect(SQLSelect { joins, .. }) => { + match verified_stmt(sql) { + SQLStatement::SQLSelect(SQLSelect { joins, .. }) => { assert_eq!(joins.len(), 1); assert_eq!( joins[0], @@ -596,32 +592,32 @@ fn parse_joins_on() { } // Test parsing of aliases assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), + joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), vec![join_with_constraint( "t2", Some("foo".to_string()), JoinOperator::Inner )] ); - parses_to( + one_statement_parses_to( "SELECT * FROM t1 JOIN t2 foo ON c1 = c2", "SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2", ); // Test parsing of different join operators assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")), + joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 ON c1 = c2")), vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), + joins_from(verified_stmt("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), + joins_from(verified_stmt("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), + joins_from(verified_stmt("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -643,32 +639,32 @@ fn parse_joins_using() { } // Test parsing of aliases assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), + joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), vec![join_with_constraint( "t2", Some("foo".to_string()), JoinOperator::Inner )] ); - parses_to( + one_statement_parses_to( "SELECT * FROM t1 JOIN t2 foo USING(c1)", "SELECT * FROM t1 JOIN t2 AS foo USING(c1)", ); // Test parsing of different join operators assert_eq!( - joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")), + joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 USING(c1)")), vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), + joins_from(verified_stmt("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), + joins_from(verified_stmt("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( - joins_from(verified("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), + joins_from(verified_stmt("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -676,54 +672,68 @@ fn parse_joins_using() { #[test] fn parse_complex_join() { let sql = "SELECT c1, c2 FROM t1, t4 JOIN t2 ON t2.c = t1.c LEFT JOIN t3 USING(q, c) WHERE t4.c = t1.c"; - assert_eq!(sql, parse_sql(sql).to_string()); + verified_stmt(sql); } #[test] fn parse_join_syntax_variants() { - parses_to( + one_statement_parses_to( "SELECT c1 FROM t1 INNER JOIN t2 USING(c1)", "SELECT c1 FROM t1 JOIN t2 USING(c1)", ); - parses_to( + one_statement_parses_to( "SELECT c1 FROM t1 LEFT OUTER JOIN t2 USING(c1)", "SELECT c1 FROM t1 LEFT JOIN t2 USING(c1)", ); - parses_to( + one_statement_parses_to( "SELECT c1 FROM t1 RIGHT OUTER JOIN t2 USING(c1)", "SELECT c1 FROM t1 RIGHT JOIN t2 USING(c1)", ); - parses_to( + one_statement_parses_to( "SELECT c1 FROM t1 FULL OUTER JOIN t2 USING(c1)", "SELECT c1 FROM t1 FULL JOIN t2 USING(c1)", ); } -fn verified(query: &str) -> ASTNode { - let ast = parse_sql(query); - assert_eq!(query, &ast.to_string()); - ast +fn verified_stmt(query: &str) -> SQLStatement { + one_statement_parses_to(query, query) } -fn parses_to(from: &str, to: &str) { - assert_eq!(to, &parse_sql(from).to_string()) +fn verified_expr(query: &str) -> ASTNode { + let ast = parse_sql_expr(query); + assert_eq!(query, &ast.to_string()); + ast } -fn joins_from(ast: ASTNode) -> Vec { +fn joins_from(ast: SQLStatement) -> Vec { match ast { - ASTNode::SQLSelect(SQLSelect { joins, .. }) => joins, + SQLStatement::SQLSelect(SQLSelect { joins, .. }) => joins, _ => panic!("Expected SELECT"), } } -fn parse_sql(sql: &str) -> ASTNode { - let generic_ast = parse_sql_with(sql, &GenericSqlDialect {}); - let pg_ast = parse_sql_with(sql, &PostgreSqlDialect {}); +/// Ensures that `sql` parses as a statement, optionally checking that +/// converting AST back to string equals to `canonical` (unless an empty string +/// is provided). +fn one_statement_parses_to(sql: &str, canonical: &str) -> SQLStatement { + let generic_ast = Parser::parse_sql(&GenericSqlDialect {}, sql.to_string()).unwrap(); + let pg_ast = Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()).unwrap(); + assert_eq!(generic_ast, pg_ast); + + if !canonical.is_empty() { + assert_eq!(canonical, generic_ast.to_string()) + } + generic_ast +} + +fn parse_sql_expr(sql: &str) -> ASTNode { + let generic_ast = parse_sql_expr_with(&GenericSqlDialect {}, &sql.to_string()); + let pg_ast = parse_sql_expr_with(&PostgreSqlDialect {}, &sql.to_string()); assert_eq!(generic_ast, pg_ast); generic_ast } -fn parse_sql_with(sql: &str, dialect: &Dialect) -> ASTNode { +fn parse_sql_expr_with(dialect: &Dialect, sql: &str) -> ASTNode { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let mut parser = Parser::new(tokens); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 11b8cb5f2..37215a9bc 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -24,8 +24,8 @@ fn test_prev_index() { #[test] fn parse_simple_insert() { let sql = String::from("INSERT INTO customer VALUES(1, 2, 3)"); - match verified(&sql) { - ASTNode::SQLInsert { + match verified_stmt(&sql) { + SQLStatement::SQLInsert { table_name, columns, values, @@ -49,8 +49,8 @@ fn parse_simple_insert() { #[test] fn parse_common_insert() { let sql = String::from("INSERT INTO public.customer VALUES(1, 2, 3)"); - match verified(&sql) { - ASTNode::SQLInsert { + match verified_stmt(&sql) { + SQLStatement::SQLInsert { table_name, columns, values, @@ -74,8 +74,8 @@ fn parse_common_insert() { #[test] fn parse_complex_insert() { let sql = String::from("INSERT INTO db.public.customer VALUES(1, 2, 3)"); - match verified(&sql) { - ASTNode::SQLInsert { + match verified_stmt(&sql) { + SQLStatement::SQLInsert { table_name, columns, values, @@ -113,8 +113,8 @@ fn parse_no_table_name() { #[test] fn parse_insert_with_columns() { let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)"); - match verified(&sql) { - ASTNode::SQLInsert { + match verified_stmt(&sql) { + SQLStatement::SQLInsert { table_name, columns, values, @@ -141,8 +141,7 @@ fn parse_insert_with_columns() { #[test] fn parse_insert_invalid() { let sql = String::from("INSERT public.customer (id, name, active) VALUES (1, 2, 3)"); - let mut parser = parser(&sql); - match parser.parse() { + match Parser::parse_sql(&PostgreSqlDialect {}, sql) { Err(_) => {} _ => assert!(false), } @@ -163,8 +162,8 @@ fn parse_create_table_with_defaults() { last_update timestamp without time zone DEFAULT now() NOT NULL, active integer NOT NULL)", ); - match parse_sql(&sql) { - ASTNode::SQLCreateTable { name, columns } => { + match one_statement_parses_to(&sql, "") { + SQLStatement::SQLCreateTable { name, columns } => { assert_eq!("public.customer", name); assert_eq!(10, columns.len()); @@ -204,9 +203,8 @@ fn parse_create_table_from_pg_dump() { release_year public.year, active integer )"); - let ast = parse_sql(&sql); - match ast { - ASTNode::SQLCreateTable { name, columns } => { + match one_statement_parses_to(&sql, "") { + SQLStatement::SQLCreateTable { name, columns } => { assert_eq!("public.customer", name); let c_customer_id = &columns[0]; @@ -259,8 +257,8 @@ fn parse_create_table_with_inherit() { use_metric boolean DEFAULT true\ )", ); - match verified(&sql) { - ASTNode::SQLCreateTable { name, columns } => { + match verified_stmt(&sql) { + SQLStatement::SQLCreateTable { name, columns } => { assert_eq!("bazaar.settings", name); let c_name = &columns[0]; @@ -288,8 +286,8 @@ fn parse_alter_table_constraint_primary_key() { ALTER TABLE bazaar.address \ ADD CONSTRAINT address_pkey PRIMARY KEY (address_id)", ); - match verified(&sql) { - ASTNode::SQLAlterTable { name, .. } => { + match verified_stmt(&sql) { + SQLStatement::SQLAlterTable { name, .. } => { assert_eq!(name, "bazaar.address"); } _ => assert!(false), @@ -301,8 +299,8 @@ fn parse_alter_table_constraint_foreign_key() { let sql = String::from("\ ALTER TABLE public.customer \ ADD CONSTRAINT customer_address_id_fkey FOREIGN KEY (address_id) REFERENCES public.address(address_id)"); - match verified(&sql) { - ASTNode::SQLAlterTable { name, .. } => { + match verified_stmt(&sql) { + SQLStatement::SQLAlterTable { name, .. } => { assert_eq!(name, "public.customer"); } _ => assert!(false), @@ -331,7 +329,7 @@ Kwara & Kogi PHP ₱ USD $ \N Some other value \\."#); - let ast = parse_sql(&sql); + let ast = one_statement_parses_to(&sql, ""); println!("{:#?}", ast); //assert_eq!(sql, ast.to_string()); } @@ -339,7 +337,7 @@ PHP ₱ USD $ #[test] fn parse_timestamps_example() { let sql = "2016-02-15 09:43:33"; - let _ = parse_sql(sql); + let _ = parse_sql_expr(sql); //TODO add assertion //assert_eq!(sql, ast.to_string()); } @@ -347,7 +345,7 @@ fn parse_timestamps_example() { #[test] fn parse_timestamps_with_millis_example() { let sql = "2017-11-02 19:15:42.308637"; - let _ = parse_sql(sql); + let _ = parse_sql_expr(sql); //TODO add assertion //assert_eq!(sql, ast.to_string()); } @@ -355,24 +353,33 @@ fn parse_timestamps_with_millis_example() { #[test] fn parse_example_value() { let sql = "SARAH.LEWIS@sakilacustomer.org"; - let ast = parse_sql(sql); + let ast = parse_sql_expr(sql); assert_eq!(sql, ast.to_string()); } #[test] fn parse_function_now() { let sql = "now()"; - let ast = parse_sql(sql); + let ast = parse_sql_expr(sql); assert_eq!(sql, ast.to_string()); } -fn verified(query: &str) -> ASTNode { - let ast = parse_sql(query); - assert_eq!(query, &ast.to_string()); - ast +fn verified_stmt(query: &str) -> SQLStatement { + one_statement_parses_to(query, query) +} + +/// Ensures that `sql` parses as a single statement, optionally checking that +/// converting AST back to string equals to `canonical` (unless an empty string +/// is provided). +fn one_statement_parses_to(sql: &str, canonical: &str) -> SQLStatement { + let only_statement = Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()).unwrap(); + if !canonical.is_empty() { + assert_eq!(canonical, only_statement.to_string()) + } + only_statement } -fn parse_sql(sql: &str) -> ASTNode { +fn parse_sql_expr(sql: &str) -> ASTNode { debug!("sql: {}", sql); let mut parser = parser(sql); let ast = parser.parse().unwrap(); From 5a0e0ec928314827a8b5c1af519ed876420a03bd Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 05:12:02 +0300 Subject: [PATCH 05/45] Simplify some tests by introducing `verified_select_stmt` and `expr_from_projection` (The primary motivation was that it makes the tests more resilient to the upcoming changes to the SQLSelectStatement to support `AS` aliases and `UNION`.) Also start using `&'static str` literals consistently instead of String::from for the `let sql` test strings. --- tests/sqlparser_generic.rs | 223 +++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 118 deletions(-) diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 455c5a609..c9d9fca0d 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -104,28 +104,23 @@ fn parse_select_count_wildcard() { #[test] fn parse_not() { - let sql = String::from( - "SELECT id FROM customer \ - WHERE NOT salary = ''", - ); - let _ast = verified_stmt(&sql); + let sql = "SELECT id FROM customer WHERE NOT salary = ''"; + let _ast = verified_only_select(sql); //TODO: add assertions } #[test] fn parse_select_string_predicate() { - let sql = String::from( - "SELECT id, fname, lname FROM customer \ - WHERE salary != 'Not Provided' AND salary != ''", - ); - let _ast = verified_stmt(&sql); + let sql = "SELECT id, fname, lname FROM customer \ + WHERE salary != 'Not Provided' AND salary != ''"; + let _ast = verified_only_select(sql); //TODO: add assertions } #[test] fn parse_projection_nested_type() { - let sql = String::from("SELECT customer.address.state FROM foo"); - let _ast = verified_stmt(&sql); + let sql = "SELECT customer.address.state FROM foo"; + let _ast = verified_only_select(sql); //TODO: add assertions } @@ -133,7 +128,7 @@ fn parse_projection_nested_type() { fn parse_compound_expr_1() { use self::ASTNode::*; use self::SQLOperator::*; - let sql = String::from("a + b * c"); + let sql = "a + b * c"; assert_eq!( SQLBinaryExpr { left: Box::new(SQLIdentifier("a".to_string())), @@ -144,7 +139,7 @@ fn parse_compound_expr_1() { right: Box::new(SQLIdentifier("c".to_string())) }) }, - verified_expr(&sql) + verified_expr(sql) ); } @@ -152,7 +147,7 @@ fn parse_compound_expr_1() { fn parse_compound_expr_2() { use self::ASTNode::*; use self::SQLOperator::*; - let sql = String::from("a * b + c"); + let sql = "a * b + c"; assert_eq!( SQLBinaryExpr { left: Box::new(SQLBinaryExpr { @@ -163,27 +158,27 @@ fn parse_compound_expr_2() { op: Plus, right: Box::new(SQLIdentifier("c".to_string())) }, - verified_expr(&sql) + verified_expr(sql) ); } #[test] fn parse_is_null() { use self::ASTNode::*; - let sql = String::from("a IS NULL"); + let sql = "a IS NULL"; assert_eq!( SQLIsNull(Box::new(SQLIdentifier("a".to_string()))), - verified_expr(&sql) + verified_expr(sql) ); } #[test] fn parse_is_not_null() { use self::ASTNode::*; - let sql = String::from("a IS NOT NULL"); + let sql = "a IS NOT NULL"; assert_eq!( SQLIsNotNull(Box::new(SQLIdentifier("a".to_string()))), - verified_expr(&sql) + verified_expr(sql) ); } @@ -313,20 +308,15 @@ fn parse_limit_accepts_all() { #[test] fn parse_cast() { - let sql = String::from("SELECT CAST(id AS bigint) FROM customer"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!(1, projection.len()); - assert_eq!( - ASTNode::SQLCast { - expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), - data_type: SQLType::BigInt - }, - projection[0] - ); - } - _ => assert!(false), - } + let sql = "SELECT CAST(id AS bigint) FROM customer"; + let select = verified_only_select(sql); + assert_eq!( + &ASTNode::SQLCast { + expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), + data_type: SQLType::BigInt + }, + expr_from_projection(only(&select.projection)) + ); one_statement_parses_to( "SELECT CAST(id AS BIGINT) FROM customer", "SELECT CAST(id AS bigint) FROM customer", @@ -374,66 +364,55 @@ fn parse_create_table() { #[test] fn parse_scalar_function_in_projection() { - let sql = String::from("SELECT sqrt(id) FROM foo"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!( - vec![ASTNode::SQLFunction { - id: String::from("sqrt"), - args: vec![ASTNode::SQLIdentifier(String::from("id"))], - }], - projection - ); - } - _ => assert!(false), - } + let sql = "SELECT sqrt(id) FROM foo"; + let select = verified_only_select(sql); + assert_eq!( + &ASTNode::SQLFunction { + id: String::from("sqrt"), + args: vec![ASTNode::SQLIdentifier(String::from("id"))], + }, + expr_from_projection(only(&select.projection)) + ); } #[test] fn parse_aggregate_with_group_by() { - let sql = String::from("SELECT a, COUNT(1), MIN(b), MAX(b) FROM foo GROUP BY a"); - let _ast = verified_stmt(&sql); + let sql = "SELECT a, COUNT(1), MIN(b), MAX(b) FROM foo GROUP BY a"; + let _ast = verified_only_select(sql); //TODO: assertions } #[test] fn parse_literal_string() { let sql = "SELECT 'one'"; - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { ref projection, .. }) => { - assert_eq!( - projection[0], - ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())) - ); - } - _ => panic!(), - } + let select = verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())), + expr_from_projection(&select.projection[0]) + ); } #[test] fn parse_simple_math_expr_plus() { let sql = "SELECT a + b, 2 + a, 2.5 + a, a_f + b_f, 2 + a_f, 2.5 + a_f FROM c"; - verified_stmt(&sql); + verified_only_select(sql); } #[test] fn parse_simple_math_expr_minus() { let sql = "SELECT a - b, 2 - a, 2.5 - a, a_f - b_f, 2 - a_f, 2.5 - a_f FROM c"; - verified_stmt(&sql); + verified_only_select(sql); } #[test] fn parse_select_version() { let sql = "SELECT @@version"; - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { ref projection, .. }) => { - assert_eq!( - projection[0], - ASTNode::SQLIdentifier("@@version".to_string()) - ); - } - _ => panic!(), - } + let select = verified_only_select(sql); + assert_eq!( + &ASTNode::SQLIdentifier("@@version".to_string()), + expr_from_projection(only(&select.projection)), + ); } #[test] @@ -465,38 +444,33 @@ fn parse_case_expression() { let sql = "SELECT CASE WHEN bar IS NULL THEN 'null' WHEN bar = 0 THEN '=0' WHEN bar >= 0 THEN '>=0' ELSE '<0' END FROM foo"; use self::ASTNode::{SQLBinaryExpr, SQLCase, SQLIdentifier, SQLIsNull, SQLValue}; use self::SQLOperator::*; - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!(1, projection.len()); - assert_eq!( - SQLCase { - conditions: vec![ - SQLIsNull(Box::new(SQLIdentifier("bar".to_string()))), - SQLBinaryExpr { - left: Box::new(SQLIdentifier("bar".to_string())), - op: Eq, - right: Box::new(SQLValue(Value::Long(0))) - }, - SQLBinaryExpr { - left: Box::new(SQLIdentifier("bar".to_string())), - op: GtEq, - right: Box::new(SQLValue(Value::Long(0))) - } - ], - results: vec![ - SQLValue(Value::SingleQuotedString("null".to_string())), - SQLValue(Value::SingleQuotedString("=0".to_string())), - SQLValue(Value::SingleQuotedString(">=0".to_string())) - ], - else_result: Some(Box::new(SQLValue(Value::SingleQuotedString( - "<0".to_string() - )))) + let select = verified_only_select(sql); + assert_eq!( + &SQLCase { + conditions: vec![ + SQLIsNull(Box::new(SQLIdentifier("bar".to_string()))), + SQLBinaryExpr { + left: Box::new(SQLIdentifier("bar".to_string())), + op: Eq, + right: Box::new(SQLValue(Value::Long(0))) }, - projection[0] - ); - } - _ => assert!(false), - } + SQLBinaryExpr { + left: Box::new(SQLIdentifier("bar".to_string())), + op: GtEq, + right: Box::new(SQLValue(Value::Long(0))) + } + ], + results: vec![ + SQLValue(Value::SingleQuotedString("null".to_string())), + SQLValue(Value::SingleQuotedString("=0".to_string())), + SQLValue(Value::SingleQuotedString(">=0".to_string())) + ], + else_result: Some(Box::new(SQLValue(Value::SingleQuotedString( + "<0".to_string() + )))) + }, + expr_from_projection(only(&select.projection)), + ); } #[test] @@ -592,7 +566,7 @@ fn parse_joins_on() { } // Test parsing of aliases assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), + verified_only_select("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2").joins, vec![join_with_constraint( "t2", Some("foo".to_string()), @@ -605,19 +579,19 @@ fn parse_joins_on() { ); // Test parsing of different join operators assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 ON c1 = c2")), + verified_only_select("SELECT * FROM t1 JOIN t2 ON c1 = c2").joins, vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), + verified_only_select("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2").joins, vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), + verified_only_select("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2").joins, vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), + verified_only_select("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2").joins, vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -639,7 +613,7 @@ fn parse_joins_using() { } // Test parsing of aliases assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), + verified_only_select("SELECT * FROM t1 JOIN t2 AS foo USING(c1)").joins, vec![join_with_constraint( "t2", Some("foo".to_string()), @@ -652,19 +626,19 @@ fn parse_joins_using() { ); // Test parsing of different join operators assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 JOIN t2 USING(c1)")), + verified_only_select("SELECT * FROM t1 JOIN t2 USING(c1)").joins, vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), + verified_only_select("SELECT * FROM t1 LEFT JOIN t2 USING(c1)").joins, vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), + verified_only_select("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)").joins, vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( - joins_from(verified_stmt("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), + verified_only_select("SELECT * FROM t1 FULL JOIN t2 USING(c1)").joins, vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -672,7 +646,7 @@ fn parse_joins_using() { #[test] fn parse_complex_join() { let sql = "SELECT c1, c2 FROM t1, t4 JOIN t2 ON t2.c = t1.c LEFT JOIN t3 USING(q, c) WHERE t4.c = t1.c"; - verified_stmt(sql); + verified_only_select(sql); } #[test] @@ -695,6 +669,26 @@ fn parse_join_syntax_variants() { ); } +fn only<'a, T>(v: &'a Vec) -> &'a T { + assert_eq!(1, v.len()); + v.first().unwrap() +} + +fn verified_query(query: &str) -> SQLSelect { + match verified_stmt(query) { + SQLStatement::SQLSelect(select) => select, + _ => panic!("Expected SELECT"), + } +} + +fn expr_from_projection(item: &ASTNode) -> &ASTNode { + item // Will be changed later to extract expression from `expr AS alias` struct +} + +fn verified_only_select(query: &str) -> SQLSelect { + verified_query(query) +} + fn verified_stmt(query: &str) -> SQLStatement { one_statement_parses_to(query, query) } @@ -705,13 +699,6 @@ fn verified_expr(query: &str) -> ASTNode { ast } -fn joins_from(ast: SQLStatement) -> Vec { - match ast { - SQLStatement::SQLSelect(SQLSelect { joins, .. }) => joins, - _ => panic!("Expected SELECT"), - } -} - /// Ensures that `sql` parses as a statement, optionally checking that /// converting AST back to string equals to `canonical` (unless an empty string /// is provided). From 707c58ad57baac0c4323886c92f0964fca00bc7d Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 00:58:55 +0300 Subject: [PATCH 06/45] Support parsing of multiple statements (5/5) Parser::parse_sql() can now parse a semicolon-separated list of statements, returning them in a Vec. To support this we: - Move handling of inter-statement tokens from the end of individual statement parsers (`parse_select` and `parse_delete`; this was not implemented for other top-level statements) to the common statement-list parsing code (`parse_sql`); - Change the "Unexpected token at end of ..." error, which didn't have tests and prevented us from parsing successive statements -> "Expected end of statement" (i.e. a delimiter - currently only ";" - or the EOF); - Add PartialEq on ParserError to be able to assert_eq!() that parsing statements that do not terminate properly returns an expected error. --- src/sqlparser.rs | 77 +++++++++++++++++++------------------ tests/sqlparser_generic.rs | 76 ++++++++++++++++++++---------------- tests/sqlparser_postgres.rs | 9 ++++- 3 files changed, 91 insertions(+), 71 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index d153c2657..ad1e98af2 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -20,7 +20,7 @@ use super::sqlast::*; use super::sqltokenizer::*; use chrono::{offset::FixedOffset, DateTime, NaiveDate, NaiveDateTime, NaiveTime}; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum ParserError { TokenizerError(String), ParserError(String), @@ -54,14 +54,36 @@ impl Parser { } /// Parse a SQL statement and produce an Abstract Syntax Tree (AST) - pub fn parse_sql(dialect: &Dialect, sql: String) -> Result { + pub fn parse_sql(dialect: &Dialect, sql: String) -> Result, ParserError> { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize()?; let mut parser = Parser::new(tokens); - parser.parse_statement() + let mut stmts = Vec::new(); + let mut expecting_statement_delimiter = false; + loop { + // ignore empty statements (between successive statement delimiters) + while parser.consume_token(&Token::SemiColon) { + expecting_statement_delimiter = false; + } + + if parser.peek_token().is_none() { + break; + } else if expecting_statement_delimiter { + return parser_err!(format!( + "Expected end of statement, found: {}", + parser.peek_token().unwrap().to_string() + )); + } + + let statement = parser.parse_statement()?; + stmts.push(statement); + expecting_statement_delimiter = true; + } + Ok(stmts) } - /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.) + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), + /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { match self.next_token() { Some(t) => match t { @@ -1095,20 +1117,10 @@ impl Parser { None }; - let _ = self.consume_token(&Token::SemiColon); - - // parse next token - if let Some(next_token) = self.peek_token() { - parser_err!(format!( - "Unexpected token at end of DELETE: {:?}", - next_token - )) - } else { - Ok(SQLStatement::SQLDelete { - relation, - selection, - }) - } + Ok(SQLStatement::SQLDelete { + relation, + selection, + }) } /// Parse a SELECT statement @@ -1154,25 +1166,16 @@ impl Parser { None }; - let _ = self.consume_token(&Token::SemiColon); - - if let Some(next_token) = self.peek_token() { - parser_err!(format!( - "Unexpected token at end of SELECT: {:?}", - next_token - )) - } else { - Ok(SQLSelect { - projection, - selection, - relation, - joins, - limit, - order_by, - group_by, - having, - }) - } + Ok(SQLSelect { + projection, + selection, + relation, + joins, + limit, + order_by, + group_by, + having, + }) } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index c9d9fca0d..6f0e1c64d 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -473,34 +473,6 @@ fn parse_case_expression() { ); } -#[test] -fn parse_select_with_semi_colon() { - let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1;"); - match one_statement_parses_to(&sql, "") { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!(3, projection.len()); - } - _ => assert!(false), - } -} - -#[test] -fn parse_delete_with_semi_colon() { - let sql: &str = "DELETE FROM 'table';"; - - match one_statement_parses_to(&sql, "") { - SQLStatement::SQLDelete { relation, .. } => { - assert_eq!( - Some(Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "table".to_string() - )))), - relation - ); - } - _ => assert!(false), - } -} - #[test] fn parse_implicit_join() { let sql = "SELECT * FROM t1, t2"; @@ -669,6 +641,37 @@ fn parse_join_syntax_variants() { ); } +#[test] +fn parse_multiple_statements() { + fn test_with(sql1: &str, sql2_kw: &str, sql2_rest: &str) { + // Check that a string consisting of two statements delimited by a semicolon + // parses the same as both statements individually: + let res = parse_sql_statements(&(sql1.to_owned() + ";" + sql2_kw + sql2_rest)); + assert_eq!( + vec![ + one_statement_parses_to(&sql1, ""), + one_statement_parses_to(&(sql2_kw.to_owned() + sql2_rest), ""), + ], + res.unwrap() + ); + // Check that extra semicolon at the end is stripped by normalization: + one_statement_parses_to(&(sql1.to_owned() + ";"), sql1); + // Check that forgetting the semicolon results in an error: + let res = parse_sql_statements(&(sql1.to_owned() + " " + sql2_kw + sql2_rest)); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: ".to_string() + sql2_kw), + res.unwrap_err() + ); + } + test_with("SELECT foo", "SELECT", " bar"); + test_with("DELETE FROM foo", "SELECT", " bar"); + test_with("INSERT INTO foo VALUES(1)", "SELECT", " bar"); + test_with("CREATE TABLE foo (baz int)", "SELECT", " bar"); + // Make sure that empty statements do not cause an error: + let res = parse_sql_statements(";;"); + assert_eq!(0, res.unwrap().len()); +} + fn only<'a, T>(v: &'a Vec) -> &'a T { assert_eq!(1, v.len()); v.first().unwrap() @@ -699,17 +702,24 @@ fn verified_expr(query: &str) -> ASTNode { ast } -/// Ensures that `sql` parses as a statement, optionally checking that +/// Ensures that `sql` parses as a single statement, optionally checking that /// converting AST back to string equals to `canonical` (unless an empty string /// is provided). fn one_statement_parses_to(sql: &str, canonical: &str) -> SQLStatement { - let generic_ast = Parser::parse_sql(&GenericSqlDialect {}, sql.to_string()).unwrap(); - let pg_ast = Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()).unwrap(); - assert_eq!(generic_ast, pg_ast); + let mut statements = parse_sql_statements(&sql).unwrap(); + assert_eq!(statements.len(), 1); + let only_statement = statements.pop().unwrap(); if !canonical.is_empty() { - assert_eq!(canonical, generic_ast.to_string()) + assert_eq!(canonical, only_statement.to_string()) } + only_statement +} + +fn parse_sql_statements(sql: &str) -> Result, ParserError> { + let generic_ast = Parser::parse_sql(&GenericSqlDialect {}, sql.to_string()); + let pg_ast = Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()); + assert_eq!(generic_ast, pg_ast); generic_ast } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 37215a9bc..1cff98284 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -372,13 +372,20 @@ fn verified_stmt(query: &str) -> SQLStatement { /// converting AST back to string equals to `canonical` (unless an empty string /// is provided). fn one_statement_parses_to(sql: &str, canonical: &str) -> SQLStatement { - let only_statement = Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()).unwrap(); + let mut statements = parse_sql_statements(&sql).unwrap(); + assert_eq!(statements.len(), 1); + + let only_statement = statements.pop().unwrap(); if !canonical.is_empty() { assert_eq!(canonical, only_statement.to_string()) } only_statement } +fn parse_sql_statements(sql: &str) -> Result, ParserError> { + Parser::parse_sql(&PostgreSqlDialect {}, sql.to_string()) +} + fn parse_sql_expr(sql: &str) -> ASTNode { debug!("sql: {}", sql); let mut parser = parser(sql); From b57c60a78c894c4f5d6d3621682aaaef25ef23de Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 29 Jan 2019 18:34:34 +0300 Subject: [PATCH 07/45] Only use parse_expr() when we expect an expression (0/4) Before this commit there was a single `parse_expr(u8)` method, which was called both 1) from within the expression parser (to parse subexpression consisting of operators with higher priority than the current one), and 2) from the top-down parser both a) to parse true expressions (such as an item of the SELECT list or the condition after WHERE or after ON), and b) to parse sequences which are not exactly "expressions". This starts cleaning this up by renaming the `parse_expr(u8)` method to `parse_subexpr()` and using it only for (1) - i.e. usually providing a non-zero precedence parameter. The non-intuitively called `parse()` method is renamed to `parse_expr()`, which became available and is used for (2a). While reviewing the existing callers of `parse_expr`, four points to follow up on were identified (marked "TBD (#)" in the commit): 1) Do not lose parens (e.g. `(1+2)*3`) when roundtripping String->AST->String by using SQLNested. 2) Incorrect precedence of the NOT unary 3) `parse_table_factor` accepts any expression where a SELECT subquery is expected. 4) parse_delete uses parse_expr() to retrieve a table name These are dealt with in the commits to follow. --- src/sqlparser.rs | 40 ++++++++++++++++++------------------- tests/sqlparser_ansi.rs | 2 +- tests/sqlparser_generic.rs | 2 +- tests/sqlparser_postgres.rs | 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index ad1e98af2..adf536050 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -109,12 +109,12 @@ impl Parser { } /// Parse a new expression - pub fn parse(&mut self) -> Result { - self.parse_expr(0) + pub fn parse_expr(&mut self) -> Result { + self.parse_subexpr(0) } /// Parse tokens until the precedence changes - pub fn parse_expr(&mut self, precedence: u8) -> Result { + pub fn parse_subexpr(&mut self, precedence: u8) -> Result { debug!("parsing expr"); let mut expr = self.parse_prefix()?; debug!("prefix: {:?}", expr); @@ -167,7 +167,7 @@ impl Parser { "CAST" => self.parse_cast_expression(), "NOT" => Ok(ASTNode::SQLUnary { operator: SQLOperator::Not, - expr: Box::new(self.parse_expr(0)?), + expr: Box::new(self.parse_subexpr(0)?), // TBD (2) }), _ => match self.peek_token() { Some(Token::LParen) => self.parse_function(&w.value), @@ -194,7 +194,7 @@ impl Parser { self.parse_sql_value() } Token::LParen => { - let expr = self.parse(); + let expr = self.parse_expr(); // TBD (1) self.expect_token(&Token::RParen)?; expr } @@ -230,11 +230,11 @@ impl Parser { let mut results = vec![]; let mut else_result = None; loop { - conditions.push(self.parse_expr(0)?); + conditions.push(self.parse_expr()?); self.expect_keyword("THEN")?; - results.push(self.parse_expr(0)?); + results.push(self.parse_expr()?); if self.parse_keywords(vec!["ELSE"]) { - else_result = Some(Box::new(self.parse_expr(0)?)); + else_result = Some(Box::new(self.parse_expr()?)); if self.parse_keywords(vec!["END"]) { break; } else { @@ -261,7 +261,7 @@ impl Parser { /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` pub fn parse_cast_expression(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let expr = self.parse_expr(0)?; + let expr = self.parse_expr()?; self.expect_keyword("AS")?; let data_type = self.parse_data_type()?; self.expect_token(&Token::RParen)?; @@ -298,7 +298,7 @@ impl Parser { Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: SQLOperator::NotLike, - right: Box::new(self.parse_expr(precedence)?), + right: Box::new(self.parse_subexpr(precedence)?), }) } else { parser_err!("Invalid tokens after NOT") @@ -322,12 +322,12 @@ impl Parser { | Token::Div => Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: self.to_sql_operator(&tok)?, - right: Box::new(self.parse_expr(precedence)?), + right: Box::new(self.parse_subexpr(precedence)?), }), _ => parser_err!(format!("No infix parser for token {:?}", tok)), }, // This is not supposed to happen, because of the precedence check - // in parse_expr. + // in parse_subexpr. None => parser_err!("Unexpected EOF in parse_infix"), } } @@ -1106,13 +1106,13 @@ impl Parser { pub fn parse_delete(&mut self) -> Result { let relation: Option> = if self.parse_keyword("FROM") { - Some(Box::new(self.parse_expr(0)?)) + Some(Box::new(self.parse_subexpr(0)?)) /* TBD (4) */ } else { None }; let selection = if self.parse_keyword("WHERE") { - Some(Box::new(self.parse_expr(0)?)) + Some(Box::new(self.parse_expr()?)) } else { None }; @@ -1136,7 +1136,7 @@ impl Parser { }; let selection = if self.parse_keyword("WHERE") { - let expr = self.parse_expr(0)?; + let expr = self.parse_expr()?; Some(Box::new(expr)) } else { None @@ -1149,7 +1149,7 @@ impl Parser { }; let having = if self.parse_keyword("HAVING") { - Some(Box::new(self.parse_expr(0)?)) + Some(Box::new(self.parse_expr()?)) } else { None }; @@ -1182,7 +1182,7 @@ impl Parser { pub fn parse_table_factor(&mut self) -> Result { let relation = if self.consume_token(&Token::LParen) { self.prev_token(); - self.parse_expr(0)? + self.parse_subexpr(0)? /* TBD (3) */ } else { self.parse_compound_identifier(&Token::Period)? }; @@ -1197,7 +1197,7 @@ impl Parser { if natural { Ok(JoinConstraint::Natural) } else if self.parse_keyword("ON") { - let constraint = self.parse_expr(0)?; + let constraint = self.parse_expr()?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { self.expect_token(&Token::LParen)?; @@ -1338,7 +1338,7 @@ impl Parser { pub fn parse_expr_list(&mut self) -> Result, ParserError> { let mut expr_list: Vec = vec![]; loop { - expr_list.push(self.parse_expr(0)?); + expr_list.push(self.parse_expr()?); if let Some(t) = self.peek_token() { if t == Token::Comma { self.next_token(); @@ -1357,7 +1357,7 @@ impl Parser { pub fn parse_order_by_expr_list(&mut self) -> Result, ParserError> { let mut expr_list: Vec = vec![]; loop { - let expr = self.parse_expr(0)?; + let expr = self.parse_expr()?; let asc = if self.parse_keyword("ASC") { Some(true) diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index b22482ae1..d55d17a03 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -23,6 +23,6 @@ fn parse_sql_expr(sql: &str) -> ASTNode { let mut tokenizer = Tokenizer::new(&dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let mut parser = Parser::new(tokens); - let ast = parser.parse().unwrap(); + let ast = parser.parse_expr().unwrap(); ast } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 6f0e1c64d..afca54e7c 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -734,6 +734,6 @@ fn parse_sql_expr_with(dialect: &Dialect, sql: &str) -> ASTNode { let mut tokenizer = Tokenizer::new(dialect, &sql); let tokens = tokenizer.tokenize().unwrap(); let mut parser = Parser::new(tokens); - let ast = parser.parse().unwrap(); + let ast = parser.parse_expr().unwrap(); ast } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 1cff98284..ff3b2c30e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -389,7 +389,7 @@ fn parse_sql_statements(sql: &str) -> Result, ParserError> { fn parse_sql_expr(sql: &str) -> ASTNode { debug!("sql: {}", sql); let mut parser = parser(sql); - let ast = parser.parse().unwrap(); + let ast = parser.parse_expr().unwrap(); ast } From 29db6197927d4e5955cb442a595e1c7bde301f6b Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 00:10:21 +0300 Subject: [PATCH 08/45] Stop losing parens when roundtripping (1/4) Before this change an expression like `(a+b)-(c+d)` was parsed correctly (as a Minus node with two Plus nodes as children), but when serializing back to an SQL string, it came up as a+b-c+d, since we don't store parens in AST and don't attempt to insert them when necessary during serialization. The latter would be hard, and we already had an SQLNested enum variant, so I changed the code to wrap the AST node for the parenthesized expression in it. --- src/sqlparser.rs | 4 ++-- tests/sqlparser_generic.rs | 11 +++++------ 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index adf536050..057eb34bc 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -194,9 +194,9 @@ impl Parser { self.parse_sql_value() } Token::LParen => { - let expr = self.parse_expr(); // TBD (1) + let expr = self.parse_expr()?; self.expect_token(&Token::RParen)?; - expr + Ok(ASTNode::SQLNested(Box::new(expr))) } _ => parser_err!(format!( "Prefix parser expected a keyword but found {:?}", diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index afca54e7c..b2d48f56a 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -420,22 +420,21 @@ fn parse_parens() { use self::ASTNode::*; use self::SQLOperator::*; let sql = "(a + b) - (c + d)"; - let ast = parse_sql_expr(&sql); assert_eq!( SQLBinaryExpr { - left: Box::new(SQLBinaryExpr { + left: Box::new(SQLNested(Box::new(SQLBinaryExpr { left: Box::new(SQLIdentifier("a".to_string())), op: Plus, right: Box::new(SQLIdentifier("b".to_string())) - }), + }))), op: Minus, - right: Box::new(SQLBinaryExpr { + right: Box::new(SQLNested(Box::new(SQLBinaryExpr { left: Box::new(SQLIdentifier("c".to_string())), op: Plus, right: Box::new(SQLIdentifier("d".to_string())) - }) + }))) }, - ast + verified_expr(sql) ); } From 82dc581639c069b9217f7a1efe2e5241846306d5 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 00:10:26 +0300 Subject: [PATCH 09/45] Fix precedence for the NOT operator (2/4) I checked the docs of a few of the most popular RDBMSes, and it seems there's consensus that the precedence of `NOT` is higher than `AND`, but lower than `IS NULL`. Postgresql[1], Oracle[2] and MySQL[3] docs say that explicitly. T-SQL docs[4] do mention it's higher than `AND`, and while they don't explicitly mention IS NULL, this snippet: select * from (select 1 as a)x where (not x.a) is null ...is a parsing error, while the following works like IS NOT NULL: select * from (select 1 as a)x where not x.a is null sqlite doesn't seem to mention `NOT` precedence, but I assume it works similarly. [1] https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-OPERATORS [2] https://docs.oracle.com/cd/B19306_01/server.102/b14200/conditions001.htm#i1034834 [3] https://dev.mysql.com/doc/refman/8.0/en/operator-precedence.html [4] https://docs.microsoft.com/en-us/sql/t-sql/language-elements/operator-precedence-transact-sql?view=sql-server-2017 --- src/sqlparser.rs | 13 ++++++++----- tests/sqlparser_generic.rs | 24 ++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 057eb34bc..dcfdf1348 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -165,10 +165,13 @@ impl Parser { } "CASE" => self.parse_case_expression(), "CAST" => self.parse_cast_expression(), - "NOT" => Ok(ASTNode::SQLUnary { - operator: SQLOperator::Not, - expr: Box::new(self.parse_subexpr(0)?), // TBD (2) - }), + "NOT" => { + let p = self.get_precedence(&Token::make_keyword("NOT"))?; + Ok(ASTNode::SQLUnary { + operator: SQLOperator::Not, + expr: Box::new(self.parse_subexpr(p)?), + }) + } _ => match self.peek_token() { Some(Token::LParen) => self.parse_function(&w.value), Some(Token::Period) => { @@ -371,7 +374,7 @@ impl Parser { &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5), &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10), &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), - &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15), + &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(17), &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index b2d48f56a..be0f52cec 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -182,6 +182,30 @@ fn parse_is_not_null() { ); } +#[test] +fn parse_not_precedence() { + use self::ASTNode::*; + // NOT has higher precedence than OR/AND, so the following must parse as (NOT true) OR true + let sql = "NOT true OR true"; + match verified_expr(sql) { + SQLBinaryExpr { + op: SQLOperator::Or, + .. + } => assert!(true), + _ => assert!(false), + }; + + // But NOT has lower precedence than comparison operators, so the following parses as NOT (a IS NULL) + let sql = "NOT a IS NULL"; + match verified_expr(sql) { + SQLUnary { + operator: SQLOperator::Not, + .. + } => assert!(true), + _ => assert!(false), + }; +} + #[test] fn parse_like() { let sql = String::from("SELECT * FROM customers WHERE name LIKE '%a'"); From 215820ef66e0e6c66c808daf9e5d4f03890e69e6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 00:10:10 +0300 Subject: [PATCH 10/45] Stricter parsing for subqueries (3/4) This makes the parser more strict when handling SELECTs nested somewhere in the main statement: 1) instead of accepting SELECT anywhere in the expression where an operand was expected, we only accept it inside parens. (I've added a test for the currently supported syntax, in ANSI SQL terms) 2) instead of accepting any expression in the derived table context: `FROM ( ... )` - we only look for a SELECT subquery there. Due to #1, I had to swith the 'ansi' test from invoking the expression parser to the statement parser. --- src/sqlast/mod.rs | 7 ++++--- src/sqlparser.rs | 15 ++++++++++----- tests/sqlparser_ansi.rs | 17 ++++------------- tests/sqlparser_generic.rs | 30 ++++++++++++++++++++++++++++++ 4 files changed, 48 insertions(+), 21 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 811697ebf..f6a45a845 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -77,8 +77,9 @@ pub enum ASTNode { relation: Box, // SQLNested or SQLCompoundIdentifier alias: Option, }, - /// SELECT - SQLSelect(SQLSelect), + /// A parenthesized subquery `(SELECT ...)`, used in expression like + /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` + SQLSubquery(SQLSelect), } impl ToString for ASTNode { @@ -139,7 +140,7 @@ impl ToString for ASTNode { relation.to_string() } } - ASTNode::SQLSelect(s) => s.to_string(), + ASTNode::SQLSubquery(s) => format!("({})", s.to_string()), } } } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index dcfdf1348..a873dc226 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -158,7 +158,6 @@ impl Parser { match self.next_token() { Some(t) => match t { Token::SQLWord(w) => match w.keyword.as_ref() { - "SELECT" => Ok(ASTNode::SQLSelect(self.parse_select()?)), "TRUE" | "FALSE" | "NULL" => { self.prev_token(); self.parse_sql_value() @@ -197,9 +196,13 @@ impl Parser { self.parse_sql_value() } Token::LParen => { - let expr = self.parse_expr()?; + let expr = if self.parse_keyword("SELECT") { + ASTNode::SQLSubquery(self.parse_select()?) + } else { + ASTNode::SQLNested(Box::new(self.parse_expr()?)) + }; self.expect_token(&Token::RParen)?; - Ok(ASTNode::SQLNested(Box::new(expr))) + Ok(expr) } _ => parser_err!(format!( "Prefix parser expected a keyword but found {:?}", @@ -1184,8 +1187,10 @@ impl Parser { /// A table name or a parenthesized subquery, followed by optional `[AS] alias` pub fn parse_table_factor(&mut self) -> Result { let relation = if self.consume_token(&Token::LParen) { - self.prev_token(); - self.parse_subexpr(0)? /* TBD (3) */ + self.expect_keyword("SELECT")?; + let subquery = self.parse_select()?; + self.expect_token(&Token::RParen)?; + ASTNode::SQLSubquery(subquery) } else { self.parse_compound_identifier(&Token::Period)? }; diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index d55d17a03..7e4900f5c 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -4,25 +4,16 @@ extern crate sqlparser; use sqlparser::dialect::AnsiSqlDialect; use sqlparser::sqlast::*; use sqlparser::sqlparser::*; -use sqlparser::sqltokenizer::*; #[test] fn parse_simple_select() { let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1"); - let ast = parse_sql_expr(&sql); - match ast { - ASTNode::SQLSelect(SQLSelect { projection, .. }) => { + let ast = Parser::parse_sql(&AnsiSqlDialect {}, sql).unwrap(); + assert_eq!(1, ast.len()); + match ast.first().unwrap() { + SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { assert_eq!(3, projection.len()); } _ => assert!(false), } } - -fn parse_sql_expr(sql: &str) -> ASTNode { - let dialect = AnsiSqlDialect {}; - let mut tokenizer = Tokenizer::new(&dialect, &sql); - let tokens = tokenizer.tokenize().unwrap(); - let mut parser = Parser::new(tokens); - let ast = parser.parse_expr().unwrap(); - ast -} diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index be0f52cec..70d813e4a 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -664,6 +664,13 @@ fn parse_join_syntax_variants() { ); } +#[test] +fn parse_derived_tables() { + let sql = "SELECT a.x, b.y FROM (SELECT x FROM foo) AS a CROSS JOIN (SELECT y FROM bar) AS b"; + let _ = verified_only_select(sql); + //TODO: add assertions +} + #[test] fn parse_multiple_statements() { fn test_with(sql1: &str, sql2_kw: &str, sql2_rest: &str) { @@ -695,6 +702,29 @@ fn parse_multiple_statements() { assert_eq!(0, res.unwrap().len()); } +#[test] +fn parse_scalar_subqueries() { + use self::ASTNode::*; + let sql = "(SELECT 1) + (SELECT 2)"; + match verified_expr(sql) { + SQLBinaryExpr { + op: SQLOperator::Plus, .. + //left: box SQLSubquery { .. }, + //right: box SQLSubquery { .. }, + } => assert!(true), + _ => assert!(false), + }; +} + +#[test] +fn parse_invalid_subquery_without_parens() { + let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); + assert_eq!( + ParserError::ParserError("Expected end of statement, found: 1".to_string()), + res.unwrap_err() + ); +} + fn only<'a, T>(v: &'a Vec) -> &'a T { assert_eq!(1, v.len()); v.first().unwrap() From 523f086be7913e88f4ab841bbbd9b45adf171f59 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 21:16:31 +0300 Subject: [PATCH 11/45] Introduce SQLObjectName struct (4.1/4.4) (To store "A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj".) Before this change - some places used `String` for this (these are updated in this commit) - while others (notably SQLStatement::SQLDelete::relation, which is the reason for this series of commits) relied on ASTNode::SQLCompoundIdentifier (which is also backed by a Vec, but, as a variant of ASTNode enum, is not convenient to use when you know you need that specific variant). --- src/sqlast/mod.rs | 30 ++++++++++++++++++++---------- src/sqlast/sqltype.rs | 4 +++- src/sqlast/table_key.rs | 6 +++--- src/sqlparser.rs | 4 ++-- tests/sqlparser_generic.rs | 2 +- tests/sqlparser_postgres.rs | 23 +++++++++++++---------- 6 files changed, 42 insertions(+), 27 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index f6a45a845..aeb303557 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -153,7 +153,7 @@ pub enum SQLStatement { /// INSERT SQLInsert { /// TABLE - table_name: String, + table_name: SQLObjectName, /// COLUMNS columns: Vec, /// VALUES (vector of rows to insert) @@ -161,7 +161,7 @@ pub enum SQLStatement { }, SQLCopy { /// TABLE - table_name: String, + table_name: SQLObjectName, /// COLUMNS columns: Vec, /// VALUES a vector of values to be copied @@ -170,7 +170,7 @@ pub enum SQLStatement { /// UPDATE SQLUpdate { /// TABLE - table_name: String, + table_name: SQLObjectName, /// Column assignments assignments: Vec, /// WHERE @@ -186,14 +186,14 @@ pub enum SQLStatement { /// CREATE TABLE SQLCreateTable { /// Table name - name: String, + name: SQLObjectName, /// Optional schema columns: Vec, }, /// ALTER TABLE SQLAlterTable { /// Table name - name: String, + name: SQLObjectName, operation: AlterOperation, }, } @@ -207,7 +207,7 @@ impl ToString for SQLStatement { columns, values, } => { - let mut s = format!("INSERT INTO {}", table_name); + let mut s = format!("INSERT INTO {}", table_name.to_string()); if columns.len() > 0 { s += &format!(" ({})", columns.join(", ")); } @@ -232,7 +232,7 @@ impl ToString for SQLStatement { columns, values, } => { - let mut s = format!("COPY {}", table_name); + let mut s = format!("COPY {}", table_name.to_string()); if columns.len() > 0 { s += &format!( " ({})", @@ -262,7 +262,7 @@ impl ToString for SQLStatement { assignments, selection, } => { - let mut s = format!("UPDATE {}", table_name); + let mut s = format!("UPDATE {}", table_name.to_string()); if assignments.len() > 0 { s += &format!( "{}", @@ -293,7 +293,7 @@ impl ToString for SQLStatement { } SQLStatement::SQLCreateTable { name, columns } => format!( "CREATE TABLE {} ({})", - name, + name.to_string(), columns .iter() .map(|c| c.to_string()) @@ -301,12 +301,22 @@ impl ToString for SQLStatement { .join(", ") ), SQLStatement::SQLAlterTable { name, operation } => { - format!("ALTER TABLE {} {}", name, operation.to_string()) + format!("ALTER TABLE {} {}", name.to_string(), operation.to_string()) } } } } +/// A name of a table, view, custom type, etc., possibly multi-part, i.e. db.schema.obj +#[derive(Debug, Clone, PartialEq)] +pub struct SQLObjectName(pub Vec); + +impl ToString for SQLObjectName { + fn to_string(&self) -> String { + self.0.join(".") + } +} + /// SQL assignment `foo = expr` as used in SQLUpdate #[derive(Debug, Clone, PartialEq)] pub struct SQLAssignment { diff --git a/src/sqlast/sqltype.rs b/src/sqlast/sqltype.rs index c81313ab7..eeb7ffe5d 100644 --- a/src/sqlast/sqltype.rs +++ b/src/sqlast/sqltype.rs @@ -1,3 +1,5 @@ +use super::SQLObjectName; + /// SQL datatypes for literals in SQL statements #[derive(Debug, Clone, PartialEq)] pub enum SQLType { @@ -44,7 +46,7 @@ pub enum SQLType { /// Bytea Bytea, /// Custom type such as enums - Custom(String), + Custom(SQLObjectName), /// Arrays Array(Box), } diff --git a/src/sqlast/table_key.rs b/src/sqlast/table_key.rs index f4ff70f4b..615145c8f 100644 --- a/src/sqlast/table_key.rs +++ b/src/sqlast/table_key.rs @@ -1,4 +1,4 @@ -use super::SQLIdent; +use super::{SQLIdent, SQLObjectName}; #[derive(Debug, PartialEq, Clone)] pub enum AlterOperation { @@ -30,7 +30,7 @@ pub enum TableKey { Key(Key), ForeignKey { key: Key, - foreign_table: String, + foreign_table: SQLObjectName, referred_columns: Vec, }, } @@ -53,7 +53,7 @@ impl ToString for TableKey { "{} FOREIGN KEY ({}) REFERENCES {}({})", key.name, key.columns.join(", "), - foreign_table, + foreign_table.to_string(), referred_columns.join(", ") ), } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index a873dc226..64cdca253 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1060,11 +1060,11 @@ impl Parser { } } - pub fn parse_tablename(&mut self) -> Result { + pub fn parse_tablename(&mut self) -> Result { let identifier = self.parse_compound_identifier(&Token::Period)?; match identifier { // TODO: should store the compound identifier itself - ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")), + ASTNode::SQLCompoundIdentifier(idents) => Ok(SQLObjectName(idents)), other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), } } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 70d813e4a..657223f12 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -364,7 +364,7 @@ fn parse_create_table() { ); match ast { SQLStatement::SQLCreateTable { name, columns } => { - assert_eq!("uk_cities", name); + assert_eq!("uk_cities", name.to_string()); assert_eq!(3, columns.len()); let c_name = &columns[0]; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ff3b2c30e..58ed2445e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -31,7 +31,7 @@ fn parse_simple_insert() { values, .. } => { - assert_eq!(table_name, "customer"); + assert_eq!(table_name.to_string(), "customer"); assert!(columns.is_empty()); assert_eq!( vec![vec![ @@ -56,7 +56,7 @@ fn parse_common_insert() { values, .. } => { - assert_eq!(table_name, "public.customer"); + assert_eq!(table_name.to_string(), "public.customer"); assert!(columns.is_empty()); assert_eq!( vec![vec![ @@ -81,7 +81,7 @@ fn parse_complex_insert() { values, .. } => { - assert_eq!(table_name, "db.public.customer"); + assert_eq!(table_name.to_string(), "db.public.customer"); assert!(columns.is_empty()); assert_eq!( vec![vec![ @@ -120,7 +120,7 @@ fn parse_insert_with_columns() { values, .. } => { - assert_eq!(table_name, "public.customer"); + assert_eq!(table_name.to_string(), "public.customer"); assert_eq!( columns, vec!["id".to_string(), "name".to_string(), "active".to_string()] @@ -164,7 +164,7 @@ fn parse_create_table_with_defaults() { ); match one_statement_parses_to(&sql, "") { SQLStatement::SQLCreateTable { name, columns } => { - assert_eq!("public.customer", name); + assert_eq!("public.customer", name.to_string()); assert_eq!(10, columns.len()); let c_name = &columns[0]; @@ -205,7 +205,7 @@ fn parse_create_table_from_pg_dump() { )"); match one_statement_parses_to(&sql, "") { SQLStatement::SQLCreateTable { name, columns } => { - assert_eq!("public.customer", name); + assert_eq!("public.customer", name.to_string()); let c_customer_id = &columns[0]; assert_eq!("customer_id", c_customer_id.name); @@ -238,7 +238,10 @@ fn parse_create_table_from_pg_dump() { let c_release_year = &columns[10]; assert_eq!( - SQLType::Custom("public.year".to_string()), + SQLType::Custom(SQLObjectName(vec![ + "public".to_string(), + "year".to_string() + ])), c_release_year.data_type ); } @@ -259,7 +262,7 @@ fn parse_create_table_with_inherit() { ); match verified_stmt(&sql) { SQLStatement::SQLCreateTable { name, columns } => { - assert_eq!("bazaar.settings", name); + assert_eq!("bazaar.settings", name.to_string()); let c_name = &columns[0]; assert_eq!("settings_id", c_name.name); @@ -288,7 +291,7 @@ fn parse_alter_table_constraint_primary_key() { ); match verified_stmt(&sql) { SQLStatement::SQLAlterTable { name, .. } => { - assert_eq!(name, "bazaar.address"); + assert_eq!(name.to_string(), "bazaar.address"); } _ => assert!(false), } @@ -301,7 +304,7 @@ fn parse_alter_table_constraint_foreign_key() { ADD CONSTRAINT customer_address_id_fkey FOREIGN KEY (address_id) REFERENCES public.address(address_id)"); match verified_stmt(&sql) { SQLStatement::SQLAlterTable { name, .. } => { - assert_eq!(name, "public.customer"); + assert_eq!(name.to_string(), "public.customer"); } _ => assert!(false), } From 39e98cb11a0b960aff5233bb0474ebb9409c42b2 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 21:22:53 +0300 Subject: [PATCH 12/45] Rename parse_tablename -> parse_object_name (4.2/4.4) ...to match the name of the recently introduced `SQLObjectName` struct and to avoid any reservations about using it with multi-part names of objects other than tables (as in the `type_name` case). --- src/sqlparser.rs | 16 +++++++++------- tests/sqlparser_postgres.rs | 4 ++-- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 64cdca253..de4c022ad 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -545,7 +545,7 @@ impl Parser { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { if self.parse_keywords(vec!["TABLE"]) { - let table_name = self.parse_tablename()?; + let table_name = self.parse_object_name()?; // parse optional column list (schema) let mut columns = vec![]; if self.consume_token(&Token::LParen) { @@ -639,7 +639,7 @@ impl Parser { Ok(TableKey::UniqueKey(key)) } else if is_foreign_key { self.expect_keyword("REFERENCES")?; - let foreign_table = self.parse_tablename()?; + let foreign_table = self.parse_object_name()?; self.expect_token(&Token::LParen)?; let referred_columns = self.parse_column_names()?; self.expect_token(&Token::RParen)?; @@ -659,7 +659,7 @@ impl Parser { pub fn parse_alter(&mut self) -> Result { self.expect_keyword("TABLE")?; let _ = self.parse_keyword("ONLY"); - let table_name = self.parse_tablename()?; + let table_name = self.parse_object_name()?; let operation: Result = if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { match self.next_token() { @@ -688,7 +688,7 @@ impl Parser { /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { - let table_name = self.parse_tablename()?; + let table_name = self.parse_object_name()?; let columns = if self.consume_token(&Token::LParen) { let column_names = self.parse_column_names()?; self.expect_token(&Token::RParen)?; @@ -986,7 +986,7 @@ impl Parser { } _ => { self.prev_token(); - let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type + let type_name = self.parse_object_name()?; Ok(SQLType::Custom(type_name)) } }, @@ -1060,7 +1060,9 @@ impl Parser { } } - pub fn parse_tablename(&mut self) -> Result { + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table"` + pub fn parse_object_name(&mut self) -> Result { let identifier = self.parse_compound_identifier(&Token::Period)?; match identifier { // TODO: should store the compound identifier itself @@ -1323,7 +1325,7 @@ impl Parser { /// Parse an INSERT statement pub fn parse_insert(&mut self) -> Result { self.expect_keyword("INTO")?; - let table_name = self.parse_tablename()?; + let table_name = self.parse_object_name()?; let columns = if self.consume_token(&Token::LParen) { let column_names = self.parse_column_names()?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 58ed2445e..672c4f037 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -99,14 +99,14 @@ fn parse_complex_insert() { #[test] fn parse_invalid_table_name() { let mut parser = parser("db.public..customer"); - let ast = parser.parse_tablename(); + let ast = parser.parse_object_name(); assert!(ast.is_err()); } #[test] fn parse_no_table_name() { let mut parser = parser(""); - let ast = parser.parse_tablename(); + let ast = parser.parse_object_name(); assert!(ast.is_err()); } From f5bd9c398fab9a1055e8e7553615a6d59cfdd383 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 21:30:25 +0300 Subject: [PATCH 13/45] Simplify by avoiding SQLCompoundIdentifier (4.3/4.4) ...instead make `parse_compound_identifier()` return the underlying Vec<> directly, and rename it to `parse_list_of_ids()`, since it's used both for parsing compound identifiers and lists of identifiers. --- src/sqlparser.rs | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index de4c022ad..ffd253960 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1028,7 +1028,7 @@ impl Parser { } /// Parse one or more identifiers with the specified separator between them - pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result { + pub fn parse_list_of_ids(&mut self, separator: &Token) -> Result, ParserError> { let mut idents = vec![]; let mut expect_identifier = true; loop { @@ -1056,27 +1056,19 @@ impl Parser { self.peek_token() )) } else { - Ok(ASTNode::SQLCompoundIdentifier(idents)) + Ok(idents) } } /// Parse a possibly qualified, possibly quoted identifier, e.g. /// `foo` or `myschema."table"` pub fn parse_object_name(&mut self) -> Result { - let identifier = self.parse_compound_identifier(&Token::Period)?; - match identifier { - // TODO: should store the compound identifier itself - ASTNode::SQLCompoundIdentifier(idents) => Ok(SQLObjectName(idents)), - other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), - } + Ok(SQLObjectName(self.parse_list_of_ids(&Token::Period)?)) } + /// Parse a comma-separated list of unqualified, possibly quoted identifiers pub fn parse_column_names(&mut self) -> Result, ParserError> { - let identifier = self.parse_compound_identifier(&Token::Comma)?; - match identifier { - ASTNode::SQLCompoundIdentifier(idents) => Ok(idents), - other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), - } + Ok(self.parse_list_of_ids(&Token::Comma)?) } pub fn parse_precision(&mut self) -> Result { @@ -1194,7 +1186,7 @@ impl Parser { self.expect_token(&Token::RParen)?; ASTNode::SQLSubquery(subquery) } else { - self.parse_compound_identifier(&Token::Period)? + ASTNode::SQLCompoundIdentifier(self.parse_object_name()?.0) }; let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(ASTNode::TableFactor { From 07790fe4c4ecb2c4429e970a7d0231a41b5181d7 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 00:10:15 +0300 Subject: [PATCH 14/45] Improve DELETE FROM parsing (4.4/4.4) Store (and parse) `table_name: SQLObjectName` instead of `relation: Option>`, which can be an arbitrary expression. Also remove the `Option<>`: the table name is not optional in any dialects I'm familiar with. While the FROM keyword itself _is_ optional in some dialects, there are more things to implement for those dialects, see https://stackoverflow.com/a/4484271/1026 --- src/sqlast/mod.rs | 9 +++------ src/sqlparser.rs | 10 +++------- tests/sqlparser_generic.rs | 28 ++++++++-------------------- 3 files changed, 14 insertions(+), 33 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index aeb303557..d1a572065 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -179,7 +179,7 @@ pub enum SQLStatement { /// DELETE SQLDelete { /// FROM - relation: Option>, + table_name: SQLObjectName, /// WHERE selection: Option>, }, @@ -279,13 +279,10 @@ impl ToString for SQLStatement { s } SQLStatement::SQLDelete { - relation, + table_name, selection, } => { - let mut s = String::from("DELETE"); - if let Some(relation) = relation { - s += &format!(" FROM {}", relation.as_ref().to_string()); - } + let mut s = format!("DELETE FROM {}", table_name.to_string()); if let Some(selection) = selection { s += &format!(" WHERE {}", selection.as_ref().to_string()); } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index ffd253960..8b313327c 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1105,12 +1105,8 @@ impl Parser { } pub fn parse_delete(&mut self) -> Result { - let relation: Option> = if self.parse_keyword("FROM") { - Some(Box::new(self.parse_subexpr(0)?)) /* TBD (4) */ - } else { - None - }; - + self.expect_keyword("FROM")?; + let table_name = self.parse_object_name()?; let selection = if self.parse_keyword("WHERE") { Some(Box::new(self.parse_expr()?)) } else { @@ -1118,7 +1114,7 @@ impl Parser { }; Ok(SQLStatement::SQLDelete { - relation, + table_name, selection, }) } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 657223f12..de538fdad 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -8,16 +8,10 @@ use sqlparser::sqltokenizer::*; #[test] fn parse_delete_statement() { - let sql: &str = "DELETE FROM 'table'"; - - match verified_stmt(&sql) { - SQLStatement::SQLDelete { relation, .. } => { - assert_eq!( - Some(Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "table".to_string() - )))), - relation - ); + let sql = "DELETE FROM \"table\""; + match verified_stmt(sql) { + SQLStatement::SQLDelete { table_name, .. } => { + assert_eq!(SQLObjectName(vec!["\"table\"".to_string()]), table_name); } _ => assert!(false), @@ -26,23 +20,17 @@ fn parse_delete_statement() { #[test] fn parse_where_delete_statement() { - let sql: &str = "DELETE FROM 'table' WHERE name = 5"; - use self::ASTNode::*; use self::SQLOperator::*; - match verified_stmt(&sql) { + let sql = "DELETE FROM foo WHERE name = 5"; + match verified_stmt(sql) { SQLStatement::SQLDelete { - relation, + table_name, selection, .. } => { - assert_eq!( - Some(Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "table".to_string() - )))), - relation - ); + assert_eq!(SQLObjectName(vec!["foo".to_string()]), table_name); assert_eq!( SQLBinaryExpr { From e0ceacd1adeb67c802e81feb5f6770cc2e254ae6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 22:52:37 +0300 Subject: [PATCH 15/45] Store original, quoted form in SQLIdent Also move more things to use SQLIdent instead of String in the hope of making it a newtype eventually. Add tests that quoted identifiers round-trip parsing/serialization correctly. --- src/sqlast/mod.rs | 7 ++++--- src/sqlast/query.rs | 2 +- src/sqlast/table_key.rs | 2 +- src/sqlparser.rs | 38 ++++++++++++++++++++------------------ tests/sqlparser_generic.rs | 28 ++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 23 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index d1a572065..ee60e8f69 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -27,7 +27,7 @@ pub use self::value::Value; pub use self::sql_operator::SQLOperator; -// This could be enhanced to remember the way the identifier was quoted +/// Identifier name, in the originally quoted form (e.g. `"id"`) pub type SQLIdent = String; /// SQL Abstract Syntax Tree (AST) @@ -64,7 +64,8 @@ pub enum ASTNode { /// SQLValue SQLValue(Value), /// Scalar function call e.g. `LEFT(foo, 5)` - SQLFunction { id: String, args: Vec }, + /// TODO: this can be a compound SQLObjectName as well (for UDFs) + SQLFunction { id: SQLIdent, args: Vec }, /// CASE [] WHEN THEN ... [ELSE ] END SQLCase { // TODO: support optional operand for "simple case" @@ -317,7 +318,7 @@ impl ToString for SQLObjectName { /// SQL assignment `foo = expr` as used in SQLUpdate #[derive(Debug, Clone, PartialEq)] pub struct SQLAssignment { - id: String, + id: SQLIdent, value: Box, } diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 81cc66c00..d947f0e46 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -134,7 +134,7 @@ pub enum JoinOperator { #[derive(Debug, Clone, PartialEq)] pub enum JoinConstraint { On(ASTNode), - Using(Vec), + Using(Vec), Natural, } diff --git a/src/sqlast/table_key.rs b/src/sqlast/table_key.rs index 615145c8f..6b1078e59 100644 --- a/src/sqlast/table_key.rs +++ b/src/sqlast/table_key.rs @@ -3,7 +3,7 @@ use super::{SQLIdent, SQLObjectName}; #[derive(Debug, PartialEq, Clone)] pub enum AlterOperation { AddConstraint(TableKey), - RemoveConstraint { name: String }, + RemoveConstraint { name: SQLIdent }, } impl ToString for AlterOperation { diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 8b313327c..0e7368645 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -172,12 +172,12 @@ impl Parser { }) } _ => match self.peek_token() { - Some(Token::LParen) => self.parse_function(&w.value), + Some(Token::LParen) => self.parse_function(w.as_sql_ident()), Some(Token::Period) => { - let mut id_parts: Vec = vec![w.value]; + let mut id_parts: Vec = vec![w.as_sql_ident()]; while self.consume_token(&Token::Period) { match self.next_token() { - Some(Token::SQLWord(w)) => id_parts.push(w.value), + Some(Token::SQLWord(w)) => id_parts.push(w.as_sql_ident()), _ => { return parser_err!(format!( "Error parsing compound identifier" @@ -187,7 +187,7 @@ impl Parser { } Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - _ => Ok(ASTNode::SQLIdentifier(w.value)), + _ => Ok(ASTNode::SQLIdentifier(w.as_sql_ident())), }, }, Token::Mult => Ok(ASTNode::SQLWildcard), @@ -213,20 +213,17 @@ impl Parser { } } - pub fn parse_function(&mut self, id: &str) -> Result { + pub fn parse_function(&mut self, id: SQLIdent) -> Result { self.expect_token(&Token::LParen)?; if self.consume_token(&Token::RParen) { Ok(ASTNode::SQLFunction { - id: id.to_string(), + id: id, args: vec![], }) } else { let args = self.parse_expr_list()?; self.expect_token(&Token::RParen)?; - Ok(ASTNode::SQLFunction { - id: id.to_string(), - args, - }) + Ok(ASTNode::SQLFunction { id, args }) } } @@ -573,7 +570,7 @@ impl Parser { Some(Token::Comma) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name.value, + name: column_name.as_sql_ident(), data_type: data_type, allow_null, is_primary, @@ -584,7 +581,7 @@ impl Parser { Some(Token::RParen) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name.value, + name: column_name.as_sql_ident(), data_type: data_type, allow_null, is_primary, @@ -622,7 +619,7 @@ impl Parser { } } - pub fn parse_table_key(&mut self, constraint_name: &str) -> Result { + pub fn parse_table_key(&mut self, constraint_name: SQLIdent) -> Result { let is_primary_key = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique_key = self.parse_keywords(vec!["UNIQUE", "KEY"]); let is_foreign_key = self.parse_keywords(vec!["FOREIGN", "KEY"]); @@ -630,7 +627,7 @@ impl Parser { let column_names = self.parse_column_names()?; self.expect_token(&Token::RParen)?; let key = Key { - name: constraint_name.to_string(), + name: constraint_name, columns: column_names, }; if is_primary_key { @@ -664,7 +661,7 @@ impl Parser { if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { match self.next_token() { Some(Token::SQLWord(ref id)) => { - let table_key = self.parse_table_key(&id.value)?; + let table_key = self.parse_table_key(id.as_sql_ident())?; Ok(AlterOperation::AddConstraint(table_key)) } _ => { @@ -1012,8 +1009,7 @@ impl Parser { Some(Token::SQLWord(ref w)) if after_as || !reserved_kwds.contains(&w.keyword.as_str()) => { - // have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default - Ok(Some(w.value.clone())) + Ok(Some(w.as_sql_ident())) } ref not_an_ident if after_as => parser_err!(format!( "Expected an identifier after AS, got {:?}", @@ -1036,7 +1032,7 @@ impl Parser { match token { Some(Token::SQLWord(s)) if expect_identifier => { expect_identifier = false; - idents.push(s.to_string()); + idents.push(s.as_sql_ident()); } Some(token) if token == separator && !expect_identifier => { expect_identifier = true; @@ -1386,3 +1382,9 @@ impl Parser { } } } + +impl SQLWord { + pub fn as_sql_ident(&self) -> SQLIdent { + self.to_string() + } +} diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index de538fdad..aa9adbdec 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -427,6 +427,34 @@ fn parse_select_version() { ); } +#[test] +fn parse_delimited_identifiers() { + // check that quoted identifiers in any position remain quoted after serialization + let sql = r#"SELECT "alias"."bar baz", "myfun"(), "simple id" FROM "a table" AS "alias""#; + let select = verified_only_select(sql); + // check SELECT + assert_eq!(3, select.projection.len()); + assert_eq!( + &ASTNode::SQLCompoundIdentifier(vec![r#""alias""#.to_string(), r#""bar baz""#.to_string()]), + expr_from_projection(&select.projection[0]), + ); + assert_eq!( + &ASTNode::SQLFunction { + id: r#""myfun""#.to_string(), + args: vec![] + }, + expr_from_projection(&select.projection[1]), + ); + assert_eq!( + &ASTNode::SQLIdentifier(r#""simple id""#.to_string()), + expr_from_projection(&select.projection[2]), + ); + + verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); + verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); + //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); +} + #[test] fn parse_parens() { use self::ASTNode::*; From 9967031cbaed216460b8533d951321184c79697f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 6 Feb 2019 06:02:54 +0300 Subject: [PATCH 16/45] Move TableFactor to be a separate enum ASTNode can now be renamed SQLExpression, as it represents a node in the "expression" part of the AST -- other nodes have their own types. --- src/sqlast/mod.rs | 17 ++------- src/sqlast/query.rs | 35 ++++++++++++++++-- src/sqlparser.rs | 25 ++++++------- tests/sqlparser_generic.rs | 72 ++++++++++++++++++-------------------- 4 files changed, 82 insertions(+), 67 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index ee60e8f69..b54f4ac87 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -20,7 +20,7 @@ mod sqltype; mod table_key; mod value; -pub use self::query::{Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLSelect}; +pub use self::query::{Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLSelect, TableFactor}; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; pub use self::value::Value; @@ -30,7 +30,8 @@ pub use self::sql_operator::SQLOperator; /// Identifier name, in the originally quoted form (e.g. `"id"`) pub type SQLIdent = String; -/// SQL Abstract Syntax Tree (AST) +/// Represents a parsed SQL expression, which is a common building +/// block of SQL statements (the part after SELECT, WHERE, etc.) #[derive(Debug, Clone, PartialEq)] pub enum ASTNode { /// Identifier e.g. table name or column name @@ -73,11 +74,6 @@ pub enum ASTNode { results: Vec, else_result: Option>, }, - /// A table name or a parenthesized subquery with an optional alias - TableFactor { - relation: Box, // SQLNested or SQLCompoundIdentifier - alias: Option, - }, /// A parenthesized subquery `(SELECT ...)`, used in expression like /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` SQLSubquery(SQLSelect), @@ -134,13 +130,6 @@ impl ToString for ASTNode { } s + " END" } - ASTNode::TableFactor { relation, alias } => { - if let Some(alias) = alias { - format!("{} AS {}", relation.to_string(), alias) - } else { - relation.to_string() - } - } ASTNode::SQLSubquery(s) => format!("({})", s.to_string()), } } diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index d947f0e46..93a12a928 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -5,7 +5,7 @@ pub struct SQLSelect { /// projection expressions pub projection: Vec, /// FROM - pub relation: Option>, // TableFactor + pub relation: Option, // JOIN pub joins: Vec, /// WHERE @@ -31,7 +31,7 @@ impl ToString for SQLSelect { .join(", ") ); if let Some(ref relation) = self.relation { - s += &format!(" FROM {}", relation.as_ref().to_string()); + s += &format!(" FROM {}", relation.to_string()); } for join in &self.joins { s += &join.to_string(); @@ -69,9 +69,38 @@ impl ToString for SQLSelect { } } +/// A table name or a parenthesized subquery with an optional alias +#[derive(Debug, Clone, PartialEq)] +pub enum TableFactor { + Table { + name: SQLObjectName, + alias: Option, + }, + Derived { + subquery: Box, + alias: Option, + }, +} + +impl ToString for TableFactor { + fn to_string(&self) -> String { + let (base, alias) = match self { + TableFactor::Table { name, alias } => (name.to_string(), alias), + TableFactor::Derived { subquery, alias } => { + (format!("({})", subquery.to_string()), alias) + } + }; + if let Some(alias) = alias { + format!("{} AS {}", base, alias) + } else { + base + } + } +} + #[derive(Debug, Clone, PartialEq)] pub struct Join { - pub relation: ASTNode, // TableFactor + pub relation: TableFactor, pub join_operator: JoinOperator, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 0e7368645..91928cfba 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1119,8 +1119,8 @@ impl Parser { pub fn parse_select(&mut self) -> Result { let projection = self.parse_expr_list()?; - let (relation, joins): (Option>, Vec) = if self.parse_keyword("FROM") { - let relation = Some(Box::new(self.parse_table_factor()?)); + let (relation, joins) = if self.parse_keyword("FROM") { + let relation = Some(self.parse_table_factor()?); let joins = self.parse_joins()?; (relation, joins) } else { @@ -1171,20 +1171,21 @@ impl Parser { } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` - pub fn parse_table_factor(&mut self) -> Result { - let relation = if self.consume_token(&Token::LParen) { + pub fn parse_table_factor(&mut self) -> Result { + if self.consume_token(&Token::LParen) { self.expect_keyword("SELECT")?; let subquery = self.parse_select()?; self.expect_token(&Token::RParen)?; - ASTNode::SQLSubquery(subquery) + Ok(TableFactor::Derived { + subquery: Box::new(subquery), + alias: self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?, + }) } else { - ASTNode::SQLCompoundIdentifier(self.parse_object_name()?.0) - }; - let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(ASTNode::TableFactor { - relation: Box::new(relation), - alias, - }) + Ok(TableFactor::Table { + name: self.parse_object_name()?, + alias: self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?, + }) + } } fn parse_join_constraint(&mut self, natural: bool) -> Result { diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index aa9adbdec..1d331af36 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -432,6 +432,14 @@ fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization let sql = r#"SELECT "alias"."bar baz", "myfun"(), "simple id" FROM "a table" AS "alias""#; let select = verified_only_select(sql); + // check FROM + match select.relation.unwrap() { + TableFactor::Table { name, alias } => { + assert_eq!(vec![r#""a table""#.to_string()], name.0); + assert_eq!(r#""alias""#, alias.unwrap()); + } + _ => panic!("Expecting TableFactor::Table"), + } // check SELECT assert_eq!(3, select.projection.len()); assert_eq!( @@ -515,45 +523,33 @@ fn parse_case_expression() { #[test] fn parse_implicit_join() { let sql = "SELECT * FROM t1, t2"; - - match verified_stmt(sql) { - SQLStatement::SQLSelect(SQLSelect { joins, .. }) => { - assert_eq!(joins.len(), 1); - assert_eq!( - joins[0], - Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), - alias: None, - }, - join_operator: JoinOperator::Implicit - } - ) - } - _ => assert!(false), - } + let select = verified_only_select(sql); + assert_eq!( + &Join { + relation: TableFactor::Table { + name: SQLObjectName(vec!["t2".to_string()]), + alias: None, + }, + join_operator: JoinOperator::Implicit + }, + only(&select.joins), + ); } #[test] fn parse_cross_join() { let sql = "SELECT * FROM t1 CROSS JOIN t2"; - - match verified_stmt(sql) { - SQLStatement::SQLSelect(SQLSelect { joins, .. }) => { - assert_eq!(joins.len(), 1); - assert_eq!( - joins[0], - Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), - alias: None, - }, - join_operator: JoinOperator::Cross - } - ) - } - _ => assert!(false), - } + let select = verified_only_select(sql); + assert_eq!( + &Join { + relation: TableFactor::Table { + name: SQLObjectName(vec!["t2".to_string()]), + alias: None, + }, + join_operator: JoinOperator::Cross + }, + only(&select.joins), + ); } #[test] @@ -564,8 +560,8 @@ fn parse_joins_on() { f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + relation: TableFactor::Table { + name: SQLObjectName(vec![relation.into()]), alias, }, join_operator: f(JoinConstraint::On(ASTNode::SQLBinaryExpr { @@ -615,8 +611,8 @@ fn parse_joins_using() { f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::TableFactor { - relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + relation: TableFactor::Table { + name: SQLObjectName(vec![relation.into()]), alias, }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), From 3619e89e9c79f5fb70b967a7bb8d3d3f909deadf Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 15:10:52 +0300 Subject: [PATCH 17/45] Remove Box<> from SQLOrderByExpr It was probably copied from somewhere else when most types were variants in ASTNode, and needed Box<> to prevent recursion in the ASTNode definition. --- src/sqlast/query.rs | 8 +------- src/sqlparser.rs | 2 +- tests/sqlparser_generic.rs | 10 +++++----- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 93a12a928..385173a1d 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -170,16 +170,10 @@ pub enum JoinConstraint { /// SQL ORDER BY expression #[derive(Debug, Clone, PartialEq)] pub struct SQLOrderByExpr { - pub expr: Box, + pub expr: ASTNode, pub asc: Option, } -impl SQLOrderByExpr { - pub fn new(expr: Box, asc: Option) -> Self { - SQLOrderByExpr { expr, asc } - } -} - impl ToString for SQLOrderByExpr { fn to_string(&self) -> String { match self.asc { diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 91928cfba..be57c1210 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1362,7 +1362,7 @@ impl Parser { None }; - expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc)); + expr_list.push(SQLOrderByExpr { expr, asc }); if let Some(Token::Comma) = self.peek_token() { self.next_token(); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 1d331af36..480b31475 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -242,15 +242,15 @@ fn parse_select_order_by() { assert_eq!( Some(vec![ SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), + expr: ASTNode::SQLIdentifier("lname".to_string()), asc: Some(true), }, SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), + expr: ASTNode::SQLIdentifier("fname".to_string()), asc: Some(false), }, SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), + expr: ASTNode::SQLIdentifier("id".to_string()), asc: None, }, ]), @@ -277,11 +277,11 @@ fn parse_select_order_by_limit() { assert_eq!( Some(vec![ SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), + expr: ASTNode::SQLIdentifier("lname".to_string()), asc: Some(true), }, SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), + expr: ASTNode::SQLIdentifier("fname".to_string()), asc: Some(false), }, ]), From c5bbfc33fdcfe03031df873aca0f76a03b172d8f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 15:30:06 +0300 Subject: [PATCH 18/45] Don't Box in SQLStatement This used to be needed when it was a variant in the ASTNode enum itself. --- src/sqlast/mod.rs | 16 ++++++++-------- src/sqlparser.rs | 4 ++-- tests/sqlparser_generic.rs | 2 +- tests/sqlparser_postgres.rs | 4 ++-- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index b54f4ac87..c5e85c09d 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -164,14 +164,14 @@ pub enum SQLStatement { /// Column assignments assignments: Vec, /// WHERE - selection: Option>, + selection: Option, }, /// DELETE SQLDelete { /// FROM table_name: SQLObjectName, /// WHERE - selection: Option>, + selection: Option, }, /// CREATE TABLE SQLCreateTable { @@ -264,7 +264,7 @@ impl ToString for SQLStatement { ); } if let Some(selection) = selection { - s += &format!(" WHERE {}", selection.as_ref().to_string()); + s += &format!(" WHERE {}", selection.to_string()); } s } @@ -274,7 +274,7 @@ impl ToString for SQLStatement { } => { let mut s = format!("DELETE FROM {}", table_name.to_string()); if let Some(selection) = selection { - s += &format!(" WHERE {}", selection.as_ref().to_string()); + s += &format!(" WHERE {}", selection.to_string()); } s } @@ -308,12 +308,12 @@ impl ToString for SQLObjectName { #[derive(Debug, Clone, PartialEq)] pub struct SQLAssignment { id: SQLIdent, - value: Box, + value: ASTNode, } impl ToString for SQLAssignment { fn to_string(&self) -> String { - format!("SET {} = {}", self.id, self.value.as_ref().to_string()) + format!("SET {} = {}", self.id, self.value.to_string()) } } @@ -324,7 +324,7 @@ pub struct SQLColumnDef { pub data_type: SQLType, pub is_primary: bool, pub is_unique: bool, - pub default: Option>, + pub default: Option, pub allow_null: bool, } @@ -338,7 +338,7 @@ impl ToString for SQLColumnDef { s += " UNIQUE"; } if let Some(ref default) = self.default { - s += &format!(" DEFAULT {}", default.as_ref().to_string()); + s += &format!(" DEFAULT {}", default.to_string()); } if !self.allow_null { s += " NOT NULL"; diff --git a/src/sqlparser.rs b/src/sqlparser.rs index be57c1210..b72e8fbc8 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -553,7 +553,7 @@ impl Parser { let is_unique = self.parse_keyword("UNIQUE"); let default = if self.parse_keyword("DEFAULT") { let expr = self.parse_default_expr(0)?; - Some(Box::new(expr)) + Some(expr) } else { None }; @@ -1104,7 +1104,7 @@ impl Parser { self.expect_keyword("FROM")?; let table_name = self.parse_object_name()?; let selection = if self.parse_keyword("WHERE") { - Some(Box::new(self.parse_expr()?)) + Some(self.parse_expr()?) } else { None }; diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 480b31475..3921ea60d 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -38,7 +38,7 @@ fn parse_where_delete_statement() { op: Eq, right: Box::new(SQLValue(Value::Long(5))), }, - *selection.unwrap(), + selection.unwrap(), ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 672c4f037..80e57176c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -224,7 +224,7 @@ fn parse_create_table_from_pg_dump() { let c_create_date1 = &columns[8]; assert_eq!( - Some(Box::new(ASTNode::SQLCast { + Some(ASTNode::SQLCast { expr: Box::new(ASTNode::SQLCast { expr: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( "now".to_string() @@ -232,7 +232,7 @@ fn parse_create_table_from_pg_dump() { data_type: SQLType::Text }), data_type: SQLType::Date - })), + }), c_create_date1.default ); From e3b981a0e2c1c76779957886106d3706fefd92e5 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 15:25:00 +0300 Subject: [PATCH 19/45] Don't Box in SQLSelect Instead change ASTNode::SQLSubquery to be Box --- src/sqlast/mod.rs | 2 +- src/sqlast/query.rs | 14 +++++++------- src/sqlparser.rs | 10 +++++----- tests/sqlparser_generic.rs | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index c5e85c09d..1461726d8 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -76,7 +76,7 @@ pub enum ASTNode { }, /// A parenthesized subquery `(SELECT ...)`, used in expression like /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` - SQLSubquery(SQLSelect), + SQLSubquery(Box), } impl ToString for ASTNode { diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 385173a1d..887042f96 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -6,18 +6,18 @@ pub struct SQLSelect { pub projection: Vec, /// FROM pub relation: Option, - // JOIN + /// JOIN pub joins: Vec, /// WHERE - pub selection: Option>, + pub selection: Option, /// ORDER BY pub order_by: Option>, /// GROUP BY pub group_by: Option>, /// HAVING - pub having: Option>, + pub having: Option, /// LIMIT - pub limit: Option>, + pub limit: Option, } impl ToString for SQLSelect { @@ -37,7 +37,7 @@ impl ToString for SQLSelect { s += &join.to_string(); } if let Some(ref selection) = self.selection { - s += &format!(" WHERE {}", selection.as_ref().to_string()); + s += &format!(" WHERE {}", selection.to_string()); } if let Some(ref group_by) = self.group_by { s += &format!( @@ -50,7 +50,7 @@ impl ToString for SQLSelect { ); } if let Some(ref having) = self.having { - s += &format!(" HAVING {}", having.as_ref().to_string()); + s += &format!(" HAVING {}", having.to_string()); } if let Some(ref order_by) = self.order_by { s += &format!( @@ -63,7 +63,7 @@ impl ToString for SQLSelect { ); } if let Some(ref limit) = self.limit { - s += &format!(" LIMIT {}", limit.as_ref().to_string()); + s += &format!(" LIMIT {}", limit.to_string()); } s } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b72e8fbc8..f2d616cc3 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -197,7 +197,7 @@ impl Parser { } Token::LParen => { let expr = if self.parse_keyword("SELECT") { - ASTNode::SQLSubquery(self.parse_select()?) + ASTNode::SQLSubquery(Box::new(self.parse_select()?)) } else { ASTNode::SQLNested(Box::new(self.parse_expr()?)) }; @@ -1129,7 +1129,7 @@ impl Parser { let selection = if self.parse_keyword("WHERE") { let expr = self.parse_expr()?; - Some(Box::new(expr)) + Some(expr) } else { None }; @@ -1141,7 +1141,7 @@ impl Parser { }; let having = if self.parse_keyword("HAVING") { - Some(Box::new(self.parse_expr()?)) + Some(self.parse_expr()?) } else { None }; @@ -1374,12 +1374,12 @@ impl Parser { } /// Parse a LIMIT clause - pub fn parse_limit(&mut self) -> Result>, ParserError> { + pub fn parse_limit(&mut self) -> Result, ParserError> { if self.parse_keyword("ALL") { Ok(None) } else { self.parse_literal_int() - .map(|n| Some(Box::new(ASTNode::SQLValue(Value::Long(n))))) + .map(|n| Some(ASTNode::SQLValue(Value::Long(n)))) } } } diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 3921ea60d..0d613469c 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -54,7 +54,7 @@ fn parse_simple_select() { projection, limit, .. }) => { assert_eq!(3, projection.len()); - assert_eq!(Some(Box::new(ASTNode::SQLValue(Value::Long(5)))), limit); + assert_eq!(Some(ASTNode::SQLValue(Value::Long(5))), limit); } _ => assert!(false), } @@ -207,7 +207,7 @@ fn parse_like() { "%a".to_string() ))), }, - *selection.unwrap() + selection.unwrap() ); } _ => assert!(false), @@ -227,7 +227,7 @@ fn parse_not_like() { "%a".to_string() ))), }, - *selection.unwrap() + selection.unwrap() ); } _ => assert!(false), @@ -287,7 +287,7 @@ fn parse_select_order_by_limit() { ]), order_by ); - assert_eq!(Some(Box::new(ASTNode::SQLValue(Value::Long(2)))), limit); + assert_eq!(Some(ASTNode::SQLValue(Value::Long(2))), limit); } _ => assert!(false), } From 6b107065ac10dcd8cf92c38132f863e0da1545b5 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 6 Feb 2019 04:13:36 +0300 Subject: [PATCH 20/45] Switch some tests to `verified_select_stmt` (the tests affected by "unboxing" in the previous commits.) --- tests/sqlparser_generic.rs | 172 ++++++++++++++++--------------------- 1 file changed, 72 insertions(+), 100 deletions(-) diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 0d613469c..dae80dcf4 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -48,16 +48,11 @@ fn parse_where_delete_statement() { #[test] fn parse_simple_select() { - let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { - projection, limit, .. - }) => { - assert_eq!(3, projection.len()); - assert_eq!(Some(ASTNode::SQLValue(Value::Long(5))), limit); - } - _ => assert!(false), - } + let sql = "SELECT id, fname, lname FROM customer WHERE id = 1 LIMIT 5"; + let select = verified_only_select(sql); + assert_eq!(3, select.projection.len()); + let select = verified_query(sql); + assert_eq!(Some(ASTNode::SQLValue(Value::Long(5))), select.limit); } #[test] @@ -196,69 +191,57 @@ fn parse_not_precedence() { #[test] fn parse_like() { - let sql = String::from("SELECT * FROM customers WHERE name LIKE '%a'"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { selection, .. }) => { - assert_eq!( - ASTNode::SQLBinaryExpr { - left: Box::new(ASTNode::SQLIdentifier("name".to_string())), - op: SQLOperator::Like, - right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "%a".to_string() - ))), - }, - selection.unwrap() - ); - } - _ => assert!(false), - } + let sql = "SELECT * FROM customers WHERE name LIKE '%a'"; + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLBinaryExpr { + left: Box::new(ASTNode::SQLIdentifier("name".to_string())), + op: SQLOperator::Like, + right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( + "%a".to_string() + ))), + }, + select.selection.unwrap() + ); } #[test] fn parse_not_like() { - let sql = String::from("SELECT * FROM customers WHERE name NOT LIKE '%a'"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { selection, .. }) => { - assert_eq!( - ASTNode::SQLBinaryExpr { - left: Box::new(ASTNode::SQLIdentifier("name".to_string())), - op: SQLOperator::NotLike, - right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( - "%a".to_string() - ))), - }, - selection.unwrap() - ); - } - _ => assert!(false), - } + let sql = "SELECT * FROM customers WHERE name NOT LIKE '%a'"; + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLBinaryExpr { + left: Box::new(ASTNode::SQLIdentifier("name".to_string())), + op: SQLOperator::NotLike, + right: Box::new(ASTNode::SQLValue(Value::SingleQuotedString( + "%a".to_string() + ))), + }, + select.selection.unwrap() + ); } #[test] fn parse_select_order_by() { fn chk(sql: &str) { - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { order_by, .. }) => { - assert_eq!( - Some(vec![ - SQLOrderByExpr { - expr: ASTNode::SQLIdentifier("lname".to_string()), - asc: Some(true), - }, - SQLOrderByExpr { - expr: ASTNode::SQLIdentifier("fname".to_string()), - asc: Some(false), - }, - SQLOrderByExpr { - expr: ASTNode::SQLIdentifier("id".to_string()), - asc: None, - }, - ]), - order_by - ); - } - _ => assert!(false), - } + let select = verified_query(sql); + assert_eq!( + Some(vec![ + SQLOrderByExpr { + expr: ASTNode::SQLIdentifier("lname".to_string()), + asc: Some(true), + }, + SQLOrderByExpr { + expr: ASTNode::SQLIdentifier("fname".to_string()), + asc: Some(false), + }, + SQLOrderByExpr { + expr: ASTNode::SQLIdentifier("id".to_string()), + asc: None, + }, + ]), + select.order_by + ); } chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); // make sure ORDER is not treated as an alias @@ -267,47 +250,36 @@ fn parse_select_order_by() { #[test] fn parse_select_order_by_limit() { - let sql = String::from( - "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC LIMIT 2", + let sql = "SELECT id, fname, lname FROM customer WHERE id < 5 \ + ORDER BY lname ASC, fname DESC LIMIT 2"; + let select = verified_query(sql); + assert_eq!( + Some(vec![ + SQLOrderByExpr { + expr: ASTNode::SQLIdentifier("lname".to_string()), + asc: Some(true), + }, + SQLOrderByExpr { + expr: ASTNode::SQLIdentifier("fname".to_string()), + asc: Some(false), + }, + ]), + select.order_by ); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { - order_by, limit, .. - }) => { - assert_eq!( - Some(vec![ - SQLOrderByExpr { - expr: ASTNode::SQLIdentifier("lname".to_string()), - asc: Some(true), - }, - SQLOrderByExpr { - expr: ASTNode::SQLIdentifier("fname".to_string()), - asc: Some(false), - }, - ]), - order_by - ); - assert_eq!(Some(ASTNode::SQLValue(Value::Long(2))), limit); - } - _ => assert!(false), - } + assert_eq!(Some(ASTNode::SQLValue(Value::Long(2))), select.limit); } #[test] fn parse_select_group_by() { - let sql = String::from("SELECT id, fname, lname FROM customer GROUP BY lname, fname"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { group_by, .. }) => { - assert_eq!( - Some(vec![ - ASTNode::SQLIdentifier("lname".to_string()), - ASTNode::SQLIdentifier("fname".to_string()), - ]), - group_by - ); - } - _ => assert!(false), - } + let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; + let select = verified_only_select(sql); + assert_eq!( + Some(vec![ + ASTNode::SQLIdentifier("lname".to_string()), + ASTNode::SQLIdentifier("fname".to_string()), + ]), + select.group_by + ); } #[test] From 05a70a358a20870554d9b71d82682d1c8e66f27b Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 04:54:11 +0300 Subject: [PATCH 21/45] Assert when an unknown keyword was passed to parse_keyword() This happens all the time when I forget to check that the keyword I wanted to use is actually listed in keywords.rs, this should help with debugging. --- src/sqlparser.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index f2d616cc3..fead1b31e 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -473,6 +473,11 @@ impl Parser { /// Look for an expected keyword and consume it if it exists #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { + // Ideally, we'd accept a enum variant, not a string, but since + // it's not trivial to maintain the enum without duplicating all + // the keywords three times, we'll settle for a run-time check that + // the string actually represents a known keyword... + assert!(keywords::ALL_KEYWORDS.contains(&expected)); match self.peek_token() { Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => { self.next_token(); From b716ffb937170c8dde546878ee96b2cf16f6d3ac Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 22:52:10 +0300 Subject: [PATCH 22/45] Simplify JOIN USING (columns) ...by reusing `parse_column_names` instead of extracting identifiers out of the `parse_expr_list`s result. --- src/sqlparser.rs | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index fead1b31e..35b2bb5df 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1201,17 +1201,7 @@ impl Parser { Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { self.expect_token(&Token::LParen)?; - let attributes = self - .parse_expr_list()? - .into_iter() - .map(|ast_node| match ast_node { - ASTNode::SQLIdentifier(ident) => Ok(ident), - unexpected => { - parser_err!(format!("Expected identifier, found {:?}", unexpected)) - } - }) - .collect::, ParserError>>()?; - + let attributes = self.parse_column_names()?; self.expect_token(&Token::RParen)?; Ok(JoinConstraint::Using(attributes)) } else { From 89602dc0448157941505eb04eb77419430d2ef9d Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 02:23:17 +0300 Subject: [PATCH 23/45] Fix a typo in parse_value error message --- src/sqlparser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 35b2bb5df..76c8599f4 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -774,7 +774,7 @@ impl Parser { //TODO: parse the timestamp here (see parse_timestamp_value()) Token::Number(ref n) if n.contains(".") => match n.parse::() { Ok(n) => Ok(Value::Double(n)), - Err(e) => parser_err!(format!("Could not parse '{}' as i64: {}", n, e)), + Err(e) => parser_err!(format!("Could not parse '{}' as f64: {}", n, e)), }, Token::Number(ref n) => match n.parse::() { Ok(n) => Ok(Value::Long(n)), From a0f625b94945211c7ae24fe02bf56f7ca079a9e8 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 03:41:39 +0300 Subject: [PATCH 24/45] Simplify parse_create() a little Don't need the duplicate `columns.push()` + we advance the tokenizer, so no need to peek first. --- src/sqlparser.rs | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 76c8599f4..423b713f0 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -571,28 +571,17 @@ impl Parser { }; debug!("default: {:?}", default); - match self.peek_token() { - Some(Token::Comma) => { - self.next_token(); - columns.push(SQLColumnDef { - name: column_name.as_sql_ident(), - data_type: data_type, - allow_null, - is_primary, - is_unique, - default, - }); - } + columns.push(SQLColumnDef { + name: column_name.as_sql_ident(), + data_type: data_type, + allow_null, + is_primary, + is_unique, + default, + }); + match self.next_token() { + Some(Token::Comma) => {} Some(Token::RParen) => { - self.next_token(); - columns.push(SQLColumnDef { - name: column_name.as_sql_ident(), - data_type: data_type, - allow_null, - is_primary, - is_unique, - default, - }); break; } other => { From 346d1ff2e454d4df7b52798515c8609424e006f5 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 06:02:39 +0300 Subject: [PATCH 25/45] Improve error messages in parse_create() By not swallowing the Err from parse_data_type(). Also switch to `match` to enable parsing table-level constraints in this loop later. --- src/sqlparser.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 423b713f0..65578f486 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -552,8 +552,9 @@ impl Parser { let mut columns = vec![]; if self.consume_token(&Token::LParen) { loop { - if let Some(Token::SQLWord(column_name)) = self.next_token() { - if let Ok(data_type) = self.parse_data_type() { + match self.next_token() { + Some(Token::SQLWord(column_name)) => { + let data_type = self.parse_data_type()?; let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique = self.parse_keyword("UNIQUE"); let default = if self.parse_keyword("DEFAULT") { @@ -586,18 +587,17 @@ impl Parser { } other => { return parser_err!( - format!("Expected ',' or ')' after column definition but found {:?}", other) - ); + format!("Expected ',' or ')' after column definition but found {:?}", other) + ); } } - } else { + } + unexpected => { return parser_err!(format!( - "Error parsing data type in column definition near: {:?}", - self.peek_token() + "Expected column name, got {:?}", + unexpected )); } - } else { - return parser_err!("Error parsing column name"); } } } From 0c0cbcaff4bfaca1d3e3512e5c44659ae084e20f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 04:01:52 +0300 Subject: [PATCH 26/45] Support basic CREATE VIEW --- src/dialect/keywords.rs | 2 + src/sqlast/mod.rs | 9 +++ src/sqlparser.rs | 136 +++++++++++++++++++++---------------- tests/sqlparser_generic.rs | 12 ++++ 4 files changed, 100 insertions(+), 59 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index 1a39fe448..e1495e56d 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -350,6 +350,7 @@ keyword!( VARCHAR, VARYING, VERSIONING, + VIEW, WHEN, WHENEVER, WHERE, @@ -697,6 +698,7 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ VARCHAR, VARYING, VERSIONING, + VIEW, WHEN, WHENEVER, WHERE, diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 1461726d8..b343b837f 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -173,6 +173,12 @@ pub enum SQLStatement { /// WHERE selection: Option, }, + /// CREATE VIEW + SQLCreateView { + /// View name + name: SQLObjectName, + query: SQLSelect, + }, /// CREATE TABLE SQLCreateTable { /// Table name @@ -278,6 +284,9 @@ impl ToString for SQLStatement { } s } + SQLStatement::SQLCreateView { name, query } => { + format!("CREATE VIEW {} AS {}", name.to_string(), query.to_string()) + } SQLStatement::SQLCreateTable { name, columns } => format!( "CREATE TABLE {} ({})", name.to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 65578f486..b6325b10f 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -546,65 +546,10 @@ impl Parser { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { - if self.parse_keywords(vec!["TABLE"]) { - let table_name = self.parse_object_name()?; - // parse optional column list (schema) - let mut columns = vec![]; - if self.consume_token(&Token::LParen) { - loop { - match self.next_token() { - Some(Token::SQLWord(column_name)) => { - let data_type = self.parse_data_type()?; - let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); - let is_unique = self.parse_keyword("UNIQUE"); - let default = if self.parse_keyword("DEFAULT") { - let expr = self.parse_default_expr(0)?; - Some(expr) - } else { - None - }; - let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { - false - } else if self.parse_keyword("NULL") { - true - } else { - true - }; - debug!("default: {:?}", default); - - columns.push(SQLColumnDef { - name: column_name.as_sql_ident(), - data_type: data_type, - allow_null, - is_primary, - is_unique, - default, - }); - match self.next_token() { - Some(Token::Comma) => {} - Some(Token::RParen) => { - break; - } - other => { - return parser_err!( - format!("Expected ',' or ')' after column definition but found {:?}", other) - ); - } - } - } - unexpected => { - return parser_err!(format!( - "Expected column name, got {:?}", - unexpected - )); - } - } - } - } - Ok(SQLStatement::SQLCreateTable { - name: table_name, - columns, - }) + if self.parse_keyword("TABLE") { + self.parse_create_table() + } else if self.parse_keyword("VIEW") { + self.parse_create_view() } else { parser_err!(format!( "Unexpected token after CREATE: {:?}", @@ -613,6 +558,79 @@ impl Parser { } } + pub fn parse_create_view(&mut self) -> Result { + // Many dialects support `OR REPLACE` | `OR ALTER` right after `CREATE`, but we don't (yet). + // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. + let name = self.parse_object_name()?; + // Parenthesized "output" columns list could be handled here. + // Some dialects allow WITH here, followed by some keywords (e.g. MS SQL) + // or `(k1=v1, k2=v2, ...)` (Postgres) + self.expect_keyword("AS")?; + self.expect_keyword("SELECT")?; + let query = self.parse_select()?; + // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + Ok(SQLStatement::SQLCreateView { name, query }) + } + + pub fn parse_create_table(&mut self) -> Result { + let table_name = self.parse_object_name()?; + // parse optional column list (schema) + let mut columns = vec![]; + if self.consume_token(&Token::LParen) { + loop { + match self.next_token() { + Some(Token::SQLWord(column_name)) => { + let data_type = self.parse_data_type()?; + let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); + let is_unique = self.parse_keyword("UNIQUE"); + let default = if self.parse_keyword("DEFAULT") { + let expr = self.parse_default_expr(0)?; + Some(expr) + } else { + None + }; + let allow_null = if self.parse_keywords(vec!["NOT", "NULL"]) { + false + } else if self.parse_keyword("NULL") { + true + } else { + true + }; + debug!("default: {:?}", default); + + columns.push(SQLColumnDef { + name: column_name.as_sql_ident(), + data_type: data_type, + allow_null, + is_primary, + is_unique, + default, + }); + match self.next_token() { + Some(Token::Comma) => {} + Some(Token::RParen) => { + break; + } + other => { + return parser_err!(format!( + "Expected ',' or ')' after column definition but found {:?}", + other + )); + } + } + } + unexpected => { + return parser_err!(format!("Expected column name, got {:?}", unexpected)); + } + } + } + } + Ok(SQLStatement::SQLCreateTable { + name: table_name, + columns, + }) + } + pub fn parse_table_key(&mut self, constraint_name: SQLIdent) -> Result { let is_primary_key = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique_key = self.parse_keywords(vec!["UNIQUE", "KEY"]); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index dae80dcf4..bb2778aaf 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -700,6 +700,18 @@ fn parse_scalar_subqueries() { }; } +#[test] +fn parse_create_view() { + let sql = "CREATE VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + SQLStatement::SQLCreateView { name, query } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!("SELECT foo FROM bar", query.to_string()); + } + _ => assert!(false), + } +} + #[test] fn parse_invalid_subquery_without_parens() { let res = parse_sql_statements("SELECT SELECT 1 FROM bar WHERE 1=1 FROM baz"); From 577e634f3c341dcb2c4252647c073af991d31c39 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 03:26:54 +0300 Subject: [PATCH 27/45] Update README to the recent changes in the AST --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5d3dd035c..a0966f853 100644 --- a/README.md +++ b/README.md @@ -30,7 +30,7 @@ println!("AST: {:?}", ast); This outputs ```rust -AST: SQLSelect { projection: [SQLIdentifier("a"), SQLIdentifier("b"), SQLLiteralLong(123), SQLFunction { id: "myfunc", args: [SQLIdentifier("b")] }], relation: Some(SQLIdentifier("table_1")), selection: Some(SQLBinaryExpr { left: SQLBinaryExpr { left: SQLIdentifier("a"), op: Gt, right: SQLIdentifier("b") }, op: And, right: SQLBinaryExpr { left: SQLIdentifier("b"), op: Lt, right: SQLLiteralLong(100) } }), order_by: Some([SQLOrderBy { expr: SQLIdentifier("a"), asc: false }, SQLOrderBy { expr: SQLIdentifier("b"), asc: true }]), group_by: None, having: None, limit: None } +AST: [SQLSelect(SQLSelect { projection: [SQLIdentifier("a"), SQLIdentifier("b"), SQLValue(Long(123)), SQLFunction { id: "myfunc", args: [SQLIdentifier("b")] }], relation: Some(Table { name: SQLObjectName(["table_1"]), alias: None }), joins: [], selection: Some(SQLBinaryExpr { left: SQLBinaryExpr { left: SQLIdentifier("a"), op: Gt, right: SQLIdentifier("b") }, op: And, right: SQLBinaryExpr { left: SQLIdentifier("b"), op: Lt, right: SQLValue(Long(100)) } }), order_by: Some([SQLOrderByExpr { expr: SQLIdentifier("a"), asc: Some(false) }, SQLOrderByExpr { expr: SQLIdentifier("b"), asc: None }]), group_by: None, having: None, limit: None })] ``` ## Design From 2e9da53ed3fce705df82defec5944981aae1e74c Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Wed, 30 Jan 2019 15:41:50 +0300 Subject: [PATCH 28/45] Small CLI app that can be used to test parsing an external SQL file --- Cargo.toml | 3 +++ examples/cli.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+) create mode 100644 examples/cli.rs diff --git a/Cargo.toml b/Cargo.toml index 7106111ac..99e49b174 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,3 +21,6 @@ path = "src/lib.rs" log = "0.4.5" chrono = "0.4.6" uuid = "0.7.1" + +[dev-dependencies] +simple_logger = "1.0.1" diff --git a/examples/cli.rs b/examples/cli.rs new file mode 100644 index 000000000..545e8218f --- /dev/null +++ b/examples/cli.rs @@ -0,0 +1,46 @@ +extern crate simple_logger; +extern crate sqlparser; +///! A small command-line app to run the parser. +/// Run with `cargo run --example cli` +use std::fs; + +use sqlparser::dialect::GenericSqlDialect; +use sqlparser::sqlparser::Parser; + +fn main() { + simple_logger::init().unwrap(); + + let filename = std::env::args() + .nth(1) + .expect("No arguments provided!\n\nUsage: cargo run --example cli FILENAME.sql"); + + let contents = + fs::read_to_string(&filename).expect(&format!("Unable to read the file {}", &filename)); + let without_bom = if contents.chars().nth(0).unwrap() as u64 != 0xfeff { + contents.as_str() + } else { + let mut chars = contents.chars(); + chars.next(); + chars.as_str() + }; + println!("Input:\n'{}'", &without_bom); + let parse_result = Parser::parse_sql(&GenericSqlDialect {}, without_bom.to_owned()); + match parse_result { + Ok(statements) => { + println!( + "Round-trip:\n'{}'", + statements + .iter() + .map(|s| s.to_string()) + .collect::>() + .join("\n") + ); + println!("Parse results:\n{:#?}", statements); + std::process::exit(0); + } + Err(e) => { + println!("Error during parsing: {:?}", e); + std::process::exit(1); + } + } +} From b3693bfa63f14dff5e4c60485e85f06d37a7fb14 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 13:11:17 +0300 Subject: [PATCH 29/45] Simplify quoted identifier tokenization --- src/sqltokenizer.rs | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 0095e5058..f8e7ef8fd 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -290,23 +290,17 @@ impl<'a> Tokenizer<'a> { } Ok(Some(Token::SingleQuotedString(s))) } - // string + // delimited (quoted) identifier '"' => { let mut s = String::new(); - chars.next(); // consume - while let Some(&ch) = chars.peek() { + let quote_start = chars.next().unwrap(); // consumes the opening quote + while let Some(ch) = chars.next() { match ch { - '"' => { - chars.next(); // consume - break; - } - _ => { - chars.next(); // consume - s.push(ch); - } + '"' => break, + _ => s.push(ch), } } - Ok(Some(Token::make_word(&s, Some('"')))) + Ok(Some(Token::make_word(&s, Some(quote_start)))) } // numbers '0'...'9' => { From b9f4b503b669930ece92b55fa02a0c5550e66515 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 31 Jan 2019 13:36:40 +0300 Subject: [PATCH 30/45] Support different quoting styles for delimited identifiers The dialect information is from https://en.wikibooks.org/wiki/SQL_Dialects_Reference/Data_structure_definition/Delimited_identifiers --- src/dialect/mod.rs | 12 ++++++++++-- src/sqltokenizer.rs | 22 +++++++++++++++++----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 3298a1de3..95ecf7924 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -8,8 +8,16 @@ pub use self::generic_sql::GenericSqlDialect; pub use self::postgresql::PostgreSqlDialect; pub trait Dialect { - /// Determine if a character is a valid identifier start character + /// Determine if a character starts a quoted identifier. The default + /// implementation, accepting "double quoted" ids is both ANSI-compliant + /// and appropriate for most dialects (with the notable exception of + /// MySQL, MS SQL, and sqlite). You can accept one of characters listed + /// in `SQLWord::matching_end_quote()` here + fn is_delimited_identifier_start(&self, ch: char) -> bool { + ch == '"' + } + /// Determine if a character is a valid start character for an unquoted identifier fn is_identifier_start(&self, ch: char) -> bool; - /// Determine if a character is a valid identifier character + /// Determine if a character is a valid unquoted identifier character fn is_identifier_part(&self, ch: char) -> bool; } diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index f8e7ef8fd..602db561e 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -163,13 +163,24 @@ pub struct SQLWord { impl ToString for SQLWord { fn to_string(&self) -> String { match self.quote_style { - Some('"') => format!("\"{}\"", self.value), - Some('[') => format!("[{}]", self.value), + Some(s) if s == '"' || s == '[' || s == '`' => { + format!("{}{}{}", s, self.value, SQLWord::matching_end_quote(s)) + } None => self.value.clone(), _ => panic!("Unexpected quote_style!"), } } } +impl SQLWord { + fn matching_end_quote(ch: char) -> char { + match ch { + '"' => '"', // ANSI and most dialects + '[' => ']', // MS SQL + '`' => '`', // MySQL + _ => panic!("unexpected quoting style!"), + } + } +} #[derive(Debug, Clone, PartialEq)] pub enum Whitespace { @@ -291,12 +302,13 @@ impl<'a> Tokenizer<'a> { Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier - '"' => { + quote_start if self.dialect.is_delimited_identifier_start(quote_start) => { let mut s = String::new(); - let quote_start = chars.next().unwrap(); // consumes the opening quote + chars.next(); // consume the opening quote + let quote_end = SQLWord::matching_end_quote(quote_start); while let Some(ch) = chars.next() { match ch { - '"' => break, + c if c == quote_end => break, _ => s.push(ch), } } From 35dd9342e2fa318be60b2dc5fb29d43eeb07680d Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 05:49:15 +0300 Subject: [PATCH 31/45] Support national string literals (N'...') Widely used in MS SQL and specified in ANSI. --- src/sqlast/value.rs | 3 ++ src/sqlparser.rs | 11 +++-- src/sqltokenizer.rs | 87 ++++++++++++++++++++++++++------------ tests/sqlparser_generic.rs | 8 +++- 4 files changed, 76 insertions(+), 33 deletions(-) diff --git a/src/sqlast/value.rs b/src/sqlast/value.rs index ec11b17e0..a061080a8 100644 --- a/src/sqlast/value.rs +++ b/src/sqlast/value.rs @@ -13,6 +13,8 @@ pub enum Value { Uuid(Uuid), /// 'string value' SingleQuotedString(String), + /// N'string value' + NationalStringLiteral(String), /// Boolean value true or false, Boolean(bool), /// Date value @@ -34,6 +36,7 @@ impl ToString for Value { Value::Double(v) => v.to_string(), Value::Uuid(v) => v.to_string(), Value::SingleQuotedString(v) => format!("'{}'", v), + Value::NationalStringLiteral(v) => format!("N'{}'", v), Value::Boolean(v) => v.to_string(), Value::Date(v) => v.to_string(), Value::Time(v) => v.to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index b6325b10f..a6784aa1d 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -191,7 +191,9 @@ impl Parser { }, }, Token::Mult => Ok(ASTNode::SQLWildcard), - Token::Number(_) | Token::SingleQuotedString(_) => { + Token::Number(_) + | Token::SingleQuotedString(_) + | Token::NationalStringLiteral(_) => { self.prev_token(); self.parse_sql_value() } @@ -205,7 +207,7 @@ impl Parser { Ok(expr) } _ => parser_err!(format!( - "Prefix parser expected a keyword but found {:?}", + "Did not expect {:?} at the beginning of an expression", t )), }, @@ -790,7 +792,10 @@ impl Parser { Token::SingleQuotedString(ref s) => { Ok(Value::SingleQuotedString(s.to_string())) } - _ => parser_err!(format!("Unsupported value: {:?}", self.peek_token())), + Token::NationalStringLiteral(ref s) => { + Ok(Value::NationalStringLiteral(s.to_string())) + } + _ => parser_err!(format!("Unsupported value: {:?}", t)), } } None => parser_err!("Expecting a value, but found EOF"), diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 602db561e..aec177d72 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -35,6 +35,8 @@ pub enum Token { Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), + /// "National" string literal: i.e: N'string' + NationalStringLiteral(String), /// Comma Comma, /// Whitespace (space, tab, etc) @@ -94,6 +96,7 @@ impl ToString for Token { Token::Number(ref n) => n.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), + Token::NationalStringLiteral(ref s) => format!("N'{}'", s), Token::Comma => ",".to_string(), Token::Whitespace(ws) => ws.to_string(), Token::Eq => "=".to_string(), @@ -265,40 +268,30 @@ impl<'a> Tokenizer<'a> { chars.next(); Ok(Some(Token::Whitespace(Whitespace::Newline))) } - // identifier or keyword - ch if self.dialect.is_identifier_start(ch) => { - let mut s = String::new(); - chars.next(); // consume - s.push(ch); - while let Some(&ch) = chars.peek() { - if self.dialect.is_identifier_part(ch) { - chars.next(); // consume - s.push(ch); - } else { - break; + 'N' => { + chars.next(); // consume, to check the next char + match chars.peek() { + Some('\'') => { + // N'...' - a + let s = self.tokenize_single_quoted_string(chars); + Ok(Some(Token::NationalStringLiteral(s))) + } + _ => { + // regular identifier starting with an "N" + let s = self.tokenize_word('N', chars); + Ok(Some(Token::make_word(&s, None))) } } + } + // identifier or keyword + ch if self.dialect.is_identifier_start(ch) => { + chars.next(); // consume the first char + let s = self.tokenize_word(ch, chars); Ok(Some(Token::make_word(&s, None))) } // string '\'' => { - //TODO: handle escaped quotes in string - //TODO: handle newlines in string - //TODO: handle EOF before terminating quote - let mut s = String::new(); - chars.next(); // consume - while let Some(&ch) = chars.peek() { - match ch { - '\'' => { - chars.next(); // consume - break; - } - _ => { - chars.next(); // consume - s.push(ch); - } - } - } + let s = self.tokenize_single_quoted_string(chars); Ok(Some(Token::SingleQuotedString(s))) } // delimited (quoted) identifier @@ -403,6 +396,44 @@ impl<'a> Tokenizer<'a> { } } + /// Tokenize an identifier or keyword, after the first char is already consumed. + fn tokenize_word(&self, first_char: char, chars: &mut Peekable) -> String { + let mut s = String::new(); + s.push(first_char); + while let Some(&ch) = chars.peek() { + if self.dialect.is_identifier_part(ch) { + chars.next(); // consume + s.push(ch); + } else { + break; + } + } + s + } + + /// Read a single quoted string, starting with the opening quote. + fn tokenize_single_quoted_string(&self, chars: &mut Peekable) -> String { + //TODO: handle escaped quotes in string + //TODO: handle newlines in string + //TODO: handle EOF before terminating quote + //TODO: handle 'string' 'string continuation' + let mut s = String::new(); + chars.next(); // consume the opening quote + while let Some(&ch) = chars.peek() { + match ch { + '\'' => { + chars.next(); // consume + break; + } + _ => { + chars.next(); // consume + s.push(ch); + } + } + } + s + } + fn consume_and_return( &self, chars: &mut Peekable, diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index bb2778aaf..9784eed74 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -368,13 +368,17 @@ fn parse_aggregate_with_group_by() { #[test] fn parse_literal_string() { - let sql = "SELECT 'one'"; + let sql = "SELECT 'one', N'national string'"; let select = verified_only_select(sql); - assert_eq!(1, select.projection.len()); + assert_eq!(2, select.projection.len()); assert_eq!( &ASTNode::SQLValue(Value::SingleQuotedString("one".to_string())), expr_from_projection(&select.projection[0]) ); + assert_eq!( + &ASTNode::SQLValue(Value::NationalStringLiteral("national string".to_string())), + expr_from_projection(&select.projection[1]) + ); } #[test] From 028c613c3f4253a5ff9cb4a67380718a1b4969f6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 06:23:28 +0300 Subject: [PATCH 32/45] Support comments in the tokenizer --- src/sqltokenizer.rs | 120 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 118 insertions(+), 2 deletions(-) diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index aec177d72..c5cd7ecd1 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -190,6 +190,8 @@ pub enum Whitespace { Space, Newline, Tab, + SingleLineComment(String), + MultiLineComment(String), } impl ToString for Whitespace { @@ -198,6 +200,8 @@ impl ToString for Whitespace { Whitespace::Space => " ".to_string(), Whitespace::Newline => "\n".to_string(), Whitespace::Tab => "\t".to_string(), + Whitespace::SingleLineComment(s) => format!("--{}", s), + Whitespace::MultiLineComment(s) => format!("/*{}*/", s), } } } @@ -326,10 +330,45 @@ impl<'a> Tokenizer<'a> { ')' => self.consume_and_return(chars, Token::RParen), ',' => self.consume_and_return(chars, Token::Comma), // operators + '-' => { + chars.next(); // consume the '-' + match chars.peek() { + Some('-') => { + chars.next(); // consume the second '-', starting a single-line comment + let mut s = String::new(); + loop { + match chars.next() { + Some(ch) if ch != '\n' => { + s.push(ch); + } + other => { + if other.is_some() { + s.push('\n'); + } + break Ok(Some(Token::Whitespace( + Whitespace::SingleLineComment(s), + ))); + } + } + } + } + // a regular '-' operator + _ => Ok(Some(Token::Minus)), + } + } + '/' => { + chars.next(); // consume the '/' + match chars.peek() { + Some('*') => { + chars.next(); // consume the '*', starting a multi-line comment + self.tokenize_multiline_comment(chars) + } + // a regular '/' operator + _ => Ok(Some(Token::Div)), + } + } '+' => self.consume_and_return(chars, Token::Plus), - '-' => self.consume_and_return(chars, Token::Minus), '*' => self.consume_and_return(chars, Token::Mult), - '/' => self.consume_and_return(chars, Token::Div), '%' => self.consume_and_return(chars, Token::Mod), '=' => self.consume_and_return(chars, Token::Eq), '.' => self.consume_and_return(chars, Token::Period), @@ -434,6 +473,36 @@ impl<'a> Tokenizer<'a> { s } + fn tokenize_multiline_comment( + &self, + chars: &mut Peekable, + ) -> Result, TokenizerError> { + let mut s = String::new(); + let mut maybe_closing_comment = false; + // TODO: deal with nested comments + loop { + match chars.next() { + Some(ch) if maybe_closing_comment && ch == '/' => { + break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); + } + Some(ch) if maybe_closing_comment && ch != '/' => { + maybe_closing_comment = false; + s.push('*'); + s.push(ch); + } + Some(ch) if !maybe_closing_comment && ch == '*' => { + maybe_closing_comment = true; + } + Some(ch) => s.push(ch), + None => { + break Err(TokenizerError( + "Unexpected EOF while in a multi-line comment".to_string(), + )); + } + } + } + } + fn consume_and_return( &self, chars: &mut Peekable, @@ -611,6 +680,53 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_comment() { + let sql = String::from("0--this is a comment\n1"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string()), + Token::Whitespace(Whitespace::SingleLineComment( + "this is a comment\n".to_string(), + )), + Token::Number("1".to_string()), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_comment_at_eof() { + let sql = String::from("--this is a comment"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![Token::Whitespace(Whitespace::SingleLineComment( + "this is a comment".to_string(), + ))]; + compare(expected, tokens); + } + + #[test] + fn tokenize_multiline_comment() { + let sql = String::from("0/*multi-line\n* /comment*/1"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string()), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* /comment".to_string(), + )), + Token::Number("1".to_string()), + ]; + compare(expected, tokens); + } + fn compare(expected: Vec, actual: Vec) { //println!("------------------------------"); //println!("tokens = {:?}", actual); From f958e9d3cf9cd2a8b8264dd9e93cf3bbe5c5cccd Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 05:10:53 +0300 Subject: [PATCH 33/45] TBD fixup multiline comment tokenization --- src/sqltokenizer.rs | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index c5cd7ecd1..83105736c 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -482,18 +482,19 @@ impl<'a> Tokenizer<'a> { // TODO: deal with nested comments loop { match chars.next() { - Some(ch) if maybe_closing_comment && ch == '/' => { - break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); - } - Some(ch) if maybe_closing_comment && ch != '/' => { - maybe_closing_comment = false; - s.push('*'); - s.push(ch); - } - Some(ch) if !maybe_closing_comment && ch == '*' => { - maybe_closing_comment = true; + Some(ch) => { + if maybe_closing_comment { + if ch == '/' { + break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); + } else { + s.push('*'); + } + } + maybe_closing_comment = ch == '*'; + if !maybe_closing_comment { + s.push(ch); + } } - Some(ch) => s.push(ch), None => { break Err(TokenizerError( "Unexpected EOF while in a multi-line comment".to_string(), @@ -727,6 +728,21 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_multiline_comment_with_even_asterisks() { + let sql = String::from("\n/** Comment **/\n"); + + let dialect = GenericSqlDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Whitespace(Whitespace::Newline), + Token::Whitespace(Whitespace::MultiLineComment("* Comment *".to_string())), + Token::Whitespace(Whitespace::Newline), + ]; + compare(expected, tokens); + } + fn compare(expected: Vec, actual: Vec) { //println!("------------------------------"); //println!("tokens = {:?}", actual); From bf0c07bb1bfcfafec42e07cc6f4acd2b0eb5818f Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 06:40:17 +0300 Subject: [PATCH 34/45] Support basic CTEs (`WITH`) Some unsupported features are noted as TODOs. --- src/sqlast/mod.rs | 10 +-- src/sqlast/query.rs | 74 +++++++++++++++------ src/sqlparser.rs | 128 ++++++++++++++++++++++++------------- tests/sqlparser_ansi.rs | 5 +- tests/sqlparser_generic.rs | 91 +++++++++++++++++++------- 5 files changed, 217 insertions(+), 91 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index b343b837f..f77058b3f 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -20,7 +20,9 @@ mod sqltype; mod table_key; mod value; -pub use self::query::{Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLSelect, TableFactor}; +pub use self::query::{ + Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, TableFactor, +}; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; pub use self::value::Value; @@ -76,7 +78,7 @@ pub enum ASTNode { }, /// A parenthesized subquery `(SELECT ...)`, used in expression like /// `SELECT (subquery) AS x` or `WHERE (subquery) = x` - SQLSubquery(Box), + SQLSubquery(Box), } impl ToString for ASTNode { @@ -139,7 +141,7 @@ impl ToString for ASTNode { #[derive(Debug, Clone, PartialEq)] pub enum SQLStatement { /// SELECT - SQLSelect(SQLSelect), + SQLSelect(SQLQuery), /// INSERT SQLInsert { /// TABLE @@ -177,7 +179,7 @@ pub enum SQLStatement { SQLCreateView { /// View name name: SQLObjectName, - query: SQLSelect, + query: SQLQuery, }, /// CREATE TABLE SQLCreateTable { diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 887042f96..28f221bd3 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -1,5 +1,53 @@ use super::*; +/// The most complete variant of a `SELECT` query expression, optionally +/// including `WITH`, `UNION` / other set operations, and `ORDER BY`. +#[derive(Debug, Clone, PartialEq)] +pub struct SQLQuery { + /// WITH (common table expressions, or CTEs) + pub ctes: Vec, + /// SELECT or UNION / EXCEPT / INTECEPT + pub body: SQLSelect, + /// ORDER BY + pub order_by: Option>, + /// LIMIT + pub limit: Option, +} + +impl ToString for SQLQuery { + fn to_string(&self) -> String { + let mut s = String::new(); + if !self.ctes.is_empty() { + s += &format!( + "WITH {} ", + self.ctes + .iter() + .map(|cte| format!("{} AS ({})", cte.alias, cte.query.to_string())) + .collect::>() + .join(", ") + ) + } + s += &self.body.to_string(); + if let Some(ref order_by) = self.order_by { + s += &format!( + " ORDER BY {}", + order_by + .iter() + .map(|o| o.to_string()) + .collect::>() + .join(", ") + ); + } + if let Some(ref limit) = self.limit { + s += &format!(" LIMIT {}", limit.to_string()); + } + s + } +} + +/// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may +/// appear either as the only body item of an `SQLQuery`, or as an operand +/// to a set operation like `UNION`. #[derive(Debug, Clone, PartialEq)] pub struct SQLSelect { /// projection expressions @@ -10,14 +58,10 @@ pub struct SQLSelect { pub joins: Vec, /// WHERE pub selection: Option, - /// ORDER BY - pub order_by: Option>, /// GROUP BY pub group_by: Option>, /// HAVING pub having: Option, - /// LIMIT - pub limit: Option, } impl ToString for SQLSelect { @@ -52,23 +96,17 @@ impl ToString for SQLSelect { if let Some(ref having) = self.having { s += &format!(" HAVING {}", having.to_string()); } - if let Some(ref order_by) = self.order_by { - s += &format!( - " ORDER BY {}", - order_by - .iter() - .map(|o| o.to_string()) - .collect::>() - .join(", ") - ); - } - if let Some(ref limit) = self.limit { - s += &format!(" LIMIT {}", limit.to_string()); - } s } } +/// A single CTE (used after `WITH`): `alias AS ( query )` +#[derive(Debug, Clone, PartialEq)] +pub struct Cte { + pub alias: SQLIdent, + pub query: SQLQuery, +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq)] pub enum TableFactor { @@ -77,7 +115,7 @@ pub enum TableFactor { alias: Option, }, Derived { - subquery: Box, + subquery: Box, alias: Option, }, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index a6784aa1d..d04be6e0d 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -88,7 +88,10 @@ impl Parser { match self.next_token() { Some(t) => match t { Token::SQLWord(ref w) if w.keyword != "" => match w.keyword.as_ref() { - "SELECT" => Ok(SQLStatement::SQLSelect(self.parse_select()?)), + "SELECT" | "WITH" => { + self.prev_token(); + Ok(SQLStatement::SQLSelect(self.parse_query()?)) + } "CREATE" => Ok(self.parse_create()?), "DELETE" => Ok(self.parse_delete()?), "INSERT" => Ok(self.parse_insert()?), @@ -198,8 +201,9 @@ impl Parser { self.parse_sql_value() } Token::LParen => { - let expr = if self.parse_keyword("SELECT") { - ASTNode::SQLSubquery(Box::new(self.parse_select()?)) + let expr = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") { + self.prev_token(); + ASTNode::SQLSubquery(Box::new(self.parse_query()?)) } else { ASTNode::SQLNested(Box::new(self.parse_expr()?)) }; @@ -568,8 +572,7 @@ impl Parser { // Some dialects allow WITH here, followed by some keywords (e.g. MS SQL) // or `(k1=v1, k2=v2, ...)` (Postgres) self.expect_keyword("AS")?; - self.expect_keyword("SELECT")?; - let query = self.parse_select()?; + let query = self.parse_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. Ok(SQLStatement::SQLCreateView { name, query }) } @@ -673,18 +676,9 @@ impl Parser { let table_name = self.parse_object_name()?; let operation: Result = if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { - match self.next_token() { - Some(Token::SQLWord(ref id)) => { - let table_key = self.parse_table_key(id.as_sql_ident())?; - Ok(AlterOperation::AddConstraint(table_key)) - } - _ => { - return parser_err!(format!( - "Expecting identifier, found : {:?}", - self.peek_token() - )); - } - } + let constraint_name = self.parse_identifier()?; + let table_key = self.parse_table_key(constraint_name)?; + Ok(AlterOperation::AddConstraint(table_key)) } else { return parser_err!(format!( "Expecting ADD CONSTRAINT, found :{:?}", @@ -1079,6 +1073,14 @@ impl Parser { Ok(SQLObjectName(self.parse_list_of_ids(&Token::Period)?)) } + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) + pub fn parse_identifier(&mut self) -> Result { + match self.next_token() { + Some(Token::SQLWord(w)) => Ok(w.as_sql_ident()), + unexpected => parser_err!(format!("Expected identifier, found {:?}", unexpected)), + } + } + /// Parse a comma-separated list of unqualified, possibly quoted identifiers pub fn parse_column_names(&mut self) -> Result, ParserError> { Ok(self.parse_list_of_ids(&Token::Comma)?) @@ -1132,7 +1134,64 @@ impl Parser { }) } - /// Parse a SELECT statement + /// Parse a query expression, i.e. a `SELECT` statement optionally + /// preceeded with some `WITH` CTE declarations and optionally followed + /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't + /// expect the initial keyword to be already consumed + pub fn parse_query(&mut self) -> Result { + let ctes = if self.parse_keyword("WITH") { + // TODO: optional RECURSIVE + self.parse_cte_list()? + } else { + vec![] + }; + + self.expect_keyword("SELECT")?; + let body = self.parse_select()?; + + let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { + Some(self.parse_order_by_expr_list()?) + } else { + None + }; + + let limit = if self.parse_keyword("LIMIT") { + self.parse_limit()? + } else { + None + }; + + Ok(SQLQuery { + ctes, + body, + limit, + order_by, + }) + } + + /// Parse one or more (comma-separated) `alias AS (subquery)` CTEs, + /// assuming the initial `WITH` was already consumed. + fn parse_cte_list(&mut self) -> Result, ParserError> { + let mut cte = vec![]; + loop { + let alias = self.parse_identifier()?; + // TODO: Optional `( )` + self.expect_keyword("AS")?; + self.expect_token(&Token::LParen)?; + cte.push(Cte { + alias, + query: self.parse_query()?, + }); + self.expect_token(&Token::RParen)?; + if !self.consume_token(&Token::Comma) { + break; + } + } + return Ok(cte); + } + + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), + /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { let projection = self.parse_expr_list()?; @@ -1145,8 +1204,7 @@ impl Parser { }; let selection = if self.parse_keyword("WHERE") { - let expr = self.parse_expr()?; - Some(expr) + Some(self.parse_expr()?) } else { None }; @@ -1163,25 +1221,11 @@ impl Parser { None }; - let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { - Some(self.parse_order_by_expr_list()?) - } else { - None - }; - - let limit = if self.parse_keyword("LIMIT") { - self.parse_limit()? - } else { - None - }; - Ok(SQLSelect { projection, selection, relation, joins, - limit, - order_by, group_by, having, }) @@ -1190,18 +1234,14 @@ impl Parser { /// A table name or a parenthesized subquery, followed by optional `[AS] alias` pub fn parse_table_factor(&mut self) -> Result { if self.consume_token(&Token::LParen) { - self.expect_keyword("SELECT")?; - let subquery = self.parse_select()?; + let subquery = Box::new(self.parse_query()?); self.expect_token(&Token::RParen)?; - Ok(TableFactor::Derived { - subquery: Box::new(subquery), - alias: self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?, - }) + let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { subquery, alias }) } else { - Ok(TableFactor::Table { - name: self.parse_object_name()?, - alias: self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?, - }) + let name = self.parse_object_name()?; + let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Table { name, alias }) } } diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index 7e4900f5c..871046b1a 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -11,7 +11,10 @@ fn parse_simple_select() { let ast = Parser::parse_sql(&AnsiSqlDialect {}, sql).unwrap(); assert_eq!(1, ast.len()); match ast.first().unwrap() { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { + SQLStatement::SQLSelect(SQLQuery { + body: SQLSelect { projection, .. }, + .. + }) => { assert_eq!(3, projection.len()); } _ => assert!(false), diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 9784eed74..e99f04607 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -57,32 +57,22 @@ fn parse_simple_select() { #[test] fn parse_select_wildcard() { - let sql = String::from("SELECT * FROM customer"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!(1, projection.len()); - assert_eq!(ASTNode::SQLWildcard, projection[0]); - } - _ => assert!(false), - } + let sql = "SELECT * FROM foo"; + let select = verified_only_select(sql); + assert_eq!(&ASTNode::SQLWildcard, only(&select.projection)); } #[test] fn parse_select_count_wildcard() { - let sql = String::from("SELECT COUNT(*) FROM customer"); - match verified_stmt(&sql) { - SQLStatement::SQLSelect(SQLSelect { projection, .. }) => { - assert_eq!(1, projection.len()); - assert_eq!( - ASTNode::SQLFunction { - id: "COUNT".to_string(), - args: vec![ASTNode::SQLWildcard], - }, - projection[0] - ); - } - _ => assert!(false), - } + let sql = "SELECT COUNT(*) FROM customer"; + let select = verified_only_select(sql); + assert_eq!( + &ASTNode::SQLFunction { + id: "COUNT".to_string(), + args: vec![ASTNode::SQLWildcard], + }, + expr_from_projection(only(&select.projection)) + ); } #[test] @@ -652,6 +642,59 @@ fn parse_join_syntax_variants() { ); } +#[test] +fn parse_ctes() { + // To be valid SQL this needs aliases for the derived columns, but + // we don't support them yet in the context of a SELECT's projection. + let cte_sqls = vec!["SELECT 1", "SELECT 2"]; + let with = &format!( + "WITH a AS ({}), b AS ({}) SELECT foo + bar FROM a, b", + cte_sqls[0], cte_sqls[1] + ); + + fn assert_ctes_in_select(expected: &Vec<&str>, sel: &SQLQuery) { + for i in 0..1 { + let Cte { + ref query, + ref alias, + } = sel.ctes[i]; + assert_eq!(expected[i], query.to_string()); + assert_eq!(if i == 0 { "a" } else { "b" }, alias); + } + } + + // Top-level CTE + assert_ctes_in_select(&cte_sqls, &verified_query(with)); + // CTE in a subquery + let sql = &format!("SELECT ({})", with); + let select = verified_only_select(sql); + match expr_from_projection(only(&select.projection)) { + &ASTNode::SQLSubquery(ref subquery) => { + assert_ctes_in_select(&cte_sqls, subquery.as_ref()); + } + _ => panic!("Expected subquery"), + } + // CTE in a derived table + let sql = &format!("SELECT * FROM ({})", with); + let select = verified_only_select(sql); + match select.relation { + Some(TableFactor::Derived { subquery, .. }) => { + assert_ctes_in_select(&cte_sqls, subquery.as_ref()) + } + _ => panic!("Expected derived table"), + } + // CTE in a view + let sql = &format!("CREATE VIEW v AS {}", with); + match verified_stmt(sql) { + SQLStatement::SQLCreateView { query, .. } => assert_ctes_in_select(&cte_sqls, &query), + _ => panic!("Expected CREATE VIEW"), + } + // CTE in a CTE... + let sql = &format!("WITH outer_cte AS ({}) SELECT * FROM outer_cte", with); + let select = verified_query(sql); + assert_ctes_in_select(&cte_sqls, &only(&select.ctes).query); +} + #[test] fn parse_derived_tables() { let sql = "SELECT a.x, b.y FROM (SELECT x FROM foo) AS a CROSS JOIN (SELECT y FROM bar) AS b"; @@ -730,7 +773,7 @@ fn only<'a, T>(v: &'a Vec) -> &'a T { v.first().unwrap() } -fn verified_query(query: &str) -> SQLSelect { +fn verified_query(query: &str) -> SQLQuery { match verified_stmt(query) { SQLStatement::SQLSelect(select) => select, _ => panic!("Expected SELECT"), @@ -742,7 +785,7 @@ fn expr_from_projection(item: &ASTNode) -> &ASTNode { } fn verified_only_select(query: &str) -> SQLSelect { - verified_query(query) + verified_query(query).body } fn verified_stmt(query: &str) -> SQLStatement { From bed03abe44035c4b3d8e9f5d6acafcddd1a90463 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Sun, 3 Feb 2019 06:40:17 +0300 Subject: [PATCH 35/45] Support `AS` and qualified wildcards in SELECT --- src/dialect/keywords.rs | 19 ++++++++-- src/sqlast/mod.rs | 11 +++++- src/sqlast/query.rs | 28 +++++++++++++- src/sqlparser.rs | 51 ++++++++++++++++++++----- tests/sqlparser_generic.rs | 78 ++++++++++++++++++++++++++++++++------ 5 files changed, 158 insertions(+), 29 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e1495e56d..b3dd1e27c 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -715,8 +715,19 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ /// These keywords can't be used as a table alias, so that `FROM table_name alias` /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ - WHERE, GROUP, ON, // keyword is 'reserved' in most dialects - JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle - NATURAL, USING, // not reserved in Oracle & MSSQL - ORDER, // UNION, EXCEPT, INTERSECT, // TODO add these with tests. + // Reserved as both a table and a column alias: + WITH, SELECT, WHERE, GROUP, ORDER, + // TODO add these with tests: UNION, EXCEPT, INTERSECT, + // Reserved only as a table alias in the `FROM`/`JOIN` clauses: + ON, JOIN, INNER, CROSS, FULL, LEFT, RIGHT, NATURAL, USING, +]; + +/// Can't be used as a column alias, so that `SELECT alias` +/// can be parsed unambiguously without looking ahead. +pub const RESERVED_FOR_COLUMN_ALIAS: &'static [&'static str] = &[ + // Reserved as both a table and a column alias: + WITH, SELECT, WHERE, GROUP, ORDER, + // TODO add these with tests: UNION, EXCEPT, INTERSECT, + // Reserved only as a column alias in the `SELECT` clause: + FROM, ]; diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index f77058b3f..2563b801a 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -21,7 +21,8 @@ mod table_key; mod value; pub use self::query::{ - Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, TableFactor, + Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, SQLSelectItem, + TableFactor, }; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; @@ -38,8 +39,13 @@ pub type SQLIdent = String; pub enum ASTNode { /// Identifier e.g. table name or column name SQLIdentifier(SQLIdent), - /// Wildcard e.g. `*` + /// Unqualified wildcard (`*`). SQL allows this in limited contexts (such as right + /// after `SELECT` or as part of an aggregate function, e.g. `COUNT(*)`, but we + /// currently accept it in contexts where it doesn't make sense, such as `* + *` SQLWildcard, + /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. + /// (Same caveats apply to SQLQualifiedWildcard as to SQLWildcard.) + SQLQualifiedWildcard(Vec), /// Multi part identifier e.g. `myschema.dbo.mytable` SQLCompoundIdentifier(Vec), /// `IS NULL` expression @@ -86,6 +92,7 @@ impl ToString for ASTNode { match self { ASTNode::SQLIdentifier(s) => s.to_string(), ASTNode::SQLWildcard => "*".to_string(), + ASTNode::SQLQualifiedWildcard(q) => q.join(".") + "*", ASTNode::SQLCompoundIdentifier(s) => s.join("."), ASTNode::SQLIsNull(ast) => format!("{} IS NULL", ast.as_ref().to_string()), ASTNode::SQLIsNotNull(ast) => format!("{} IS NOT NULL", ast.as_ref().to_string()), diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 28f221bd3..9ccf56028 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -51,7 +51,7 @@ impl ToString for SQLQuery { #[derive(Debug, Clone, PartialEq)] pub struct SQLSelect { /// projection expressions - pub projection: Vec, + pub projection: Vec, /// FROM pub relation: Option, /// JOIN @@ -107,6 +107,32 @@ pub struct Cte { pub query: SQLQuery, } +/// One item of the comma-separated list following `SELECT` +#[derive(Debug, Clone, PartialEq)] +pub enum SQLSelectItem { + /// Any expression, not followed by `[ AS ] alias` + UnnamedExpression(ASTNode), + /// An expression, followed by `[ AS ] alias` + ExpressionWithAlias(ASTNode, SQLIdent), + /// `alias.*` or even `schema.table.*` + QualifiedWildcard(SQLObjectName), + /// An unqualified `*` + Wildcard, +} + +impl ToString for SQLSelectItem { + fn to_string(&self) -> String { + match &self { + SQLSelectItem::UnnamedExpression(expr) => expr.to_string(), + SQLSelectItem::ExpressionWithAlias(expr, alias) => { + format!("{} AS {}", expr.to_string(), alias) + } + SQLSelectItem::QualifiedWildcard(prefix) => format!("{}.*", prefix.to_string()), + SQLSelectItem::Wildcard => "*".to_string(), + } + } +} + /// A table name or a parenthesized subquery with an optional alias #[derive(Debug, Clone, PartialEq)] pub enum TableFactor { diff --git a/src/sqlparser.rs b/src/sqlparser.rs index d04be6e0d..8f8c3e541 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -178,9 +178,14 @@ impl Parser { Some(Token::LParen) => self.parse_function(w.as_sql_ident()), Some(Token::Period) => { let mut id_parts: Vec = vec![w.as_sql_ident()]; + let mut ends_with_wildcard = false; while self.consume_token(&Token::Period) { match self.next_token() { Some(Token::SQLWord(w)) => id_parts.push(w.as_sql_ident()), + Some(Token::Mult) => { + ends_with_wildcard = true; + break; + } _ => { return parser_err!(format!( "Error parsing compound identifier" @@ -188,7 +193,11 @@ impl Parser { } } } - Ok(ASTNode::SQLCompoundIdentifier(id_parts)) + if ends_with_wildcard { + Ok(ASTNode::SQLQualifiedWildcard(id_parts)) + } else { + Ok(ASTNode::SQLCompoundIdentifier(id_parts)) + } } _ => Ok(ASTNode::SQLIdentifier(w.as_sql_ident())), }, @@ -1193,7 +1202,7 @@ impl Parser { /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { - let projection = self.parse_expr_list()?; + let projection = self.parse_select_list()?; let (relation, joins) = if self.parse_keyword("FROM") { let relation = Some(self.parse_table_factor()?); @@ -1381,18 +1390,40 @@ impl Parser { let mut expr_list: Vec = vec![]; loop { expr_list.push(self.parse_expr()?); - if let Some(t) = self.peek_token() { - if t == Token::Comma { - self.next_token(); + match self.peek_token() { + Some(Token::Comma) => self.next_token(), + _ => break, + }; + } + Ok(expr_list) + } + + /// Parse a comma-delimited list of projections after SELECT + pub fn parse_select_list(&mut self) -> Result, ParserError> { + let mut projections: Vec = vec![]; + loop { + let expr = self.parse_expr()?; + if let ASTNode::SQLWildcard = expr { + projections.push(SQLSelectItem::Wildcard); + } else if let ASTNode::SQLQualifiedWildcard(prefix) = expr { + projections.push(SQLSelectItem::QualifiedWildcard(SQLObjectName(prefix))); + } else { + // `expr` is a regular SQL expression and can be followed by an alias + if let Some(alias) = + self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS)? + { + projections.push(SQLSelectItem::ExpressionWithAlias(expr, alias)); } else { - break; + projections.push(SQLSelectItem::UnnamedExpression(expr)); } - } else { - //EOF - break; } + + match self.peek_token() { + Some(Token::Comma) => self.next_token(), + _ => break, + }; } - Ok(expr_list) + Ok(projections) } /// Parse a comma-delimited list of SQL ORDER BY expressions diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index e99f04607..125f4bb7a 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -59,7 +59,46 @@ fn parse_simple_select() { fn parse_select_wildcard() { let sql = "SELECT * FROM foo"; let select = verified_only_select(sql); - assert_eq!(&ASTNode::SQLWildcard, only(&select.projection)); + assert_eq!(&SQLSelectItem::Wildcard, only(&select.projection)); + + let sql = "SELECT foo.* FROM foo"; + let select = verified_only_select(sql); + assert_eq!( + &SQLSelectItem::QualifiedWildcard(SQLObjectName(vec!["foo".to_string()])), + only(&select.projection) + ); + + let sql = "SELECT myschema.mytable.* FROM myschema.mytable"; + let select = verified_only_select(sql); + assert_eq!( + &SQLSelectItem::QualifiedWildcard(SQLObjectName(vec![ + "myschema".to_string(), + "mytable".to_string(), + ])), + only(&select.projection) + ); +} + +#[test] +fn parse_column_aliases() { + let sql = "SELECT a.col + 1 AS newname FROM foo AS a"; + let select = verified_only_select(sql); + if let SQLSelectItem::ExpressionWithAlias( + ASTNode::SQLBinaryExpr { + ref op, ref right, .. + }, + ref alias, + ) = only(&select.projection) + { + assert_eq!(&SQLOperator::Plus, op); + assert_eq!(&ASTNode::SQLValue(Value::Long(1)), right.as_ref()); + assert_eq!("newname", alias); + } else { + panic!("Expected ExpressionWithAlias") + } + + // alias without AS is parsed correctly: + one_statement_parses_to("SELECT a.col + 1 newname FROM foo AS a", &sql); } #[test] @@ -236,6 +275,7 @@ fn parse_select_order_by() { chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); // make sure ORDER is not treated as an alias chk("SELECT id, fname, lname FROM customer ORDER BY lname ASC, fname DESC, id"); + chk("SELECT 1 AS lname, 2 AS fname, 3 AS id, 4 ORDER BY lname ASC, fname DESC, id"); } #[test] @@ -396,8 +436,9 @@ fn parse_select_version() { #[test] fn parse_delimited_identifiers() { // check that quoted identifiers in any position remain quoted after serialization - let sql = r#"SELECT "alias"."bar baz", "myfun"(), "simple id" FROM "a table" AS "alias""#; - let select = verified_only_select(sql); + let select = verified_only_select( + r#"SELECT "alias"."bar baz", "myfun"(), "simple id" AS "column alias" FROM "a table" AS "alias""# + ); // check FROM match select.relation.unwrap() { TableFactor::Table { name, alias } => { @@ -419,10 +460,13 @@ fn parse_delimited_identifiers() { }, expr_from_projection(&select.projection[1]), ); - assert_eq!( - &ASTNode::SQLIdentifier(r#""simple id""#.to_string()), - expr_from_projection(&select.projection[2]), - ); + match &select.projection[2] { + &SQLSelectItem::ExpressionWithAlias(ref expr, ref alias) => { + assert_eq!(&ASTNode::SQLIdentifier(r#""simple id""#.to_string()), expr); + assert_eq!(r#""column alias""#, alias); + } + _ => panic!("Expected ExpressionWithAlias"), + } verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); @@ -644,9 +688,7 @@ fn parse_join_syntax_variants() { #[test] fn parse_ctes() { - // To be valid SQL this needs aliases for the derived columns, but - // we don't support them yet in the context of a SELECT's projection. - let cte_sqls = vec!["SELECT 1", "SELECT 2"]; + let cte_sqls = vec!["SELECT 1 AS foo", "SELECT 2 AS bar"]; let with = &format!( "WITH a AS ({}), b AS ({}) SELECT foo + bar FROM a, b", cte_sqls[0], cte_sqls[1] @@ -725,6 +767,15 @@ fn parse_multiple_statements() { ); } test_with("SELECT foo", "SELECT", " bar"); + // ensure that SELECT/WITH is not parsed as a table or column alias if ';' + // separating the statements is omitted: + test_with("SELECT foo FROM baz", "SELECT", " bar"); + test_with("SELECT foo", "WITH", " cte AS (SELECT 1 AS s) SELECT bar"); + test_with( + "SELECT foo FROM baz", + "WITH", + " cte AS (SELECT 1 AS s) SELECT bar", + ); test_with("DELETE FROM foo", "SELECT", " bar"); test_with("INSERT INTO foo VALUES(1)", "SELECT", " bar"); test_with("CREATE TABLE foo (baz int)", "SELECT", " bar"); @@ -780,8 +831,11 @@ fn verified_query(query: &str) -> SQLQuery { } } -fn expr_from_projection(item: &ASTNode) -> &ASTNode { - item // Will be changed later to extract expression from `expr AS alias` struct +fn expr_from_projection(item: &SQLSelectItem) -> &ASTNode { + match item { + SQLSelectItem::UnnamedExpression(expr) => expr, + _ => panic!("Expected UnnamedExpression"), + } } fn verified_only_select(query: &str) -> SQLSelect { From 264319347dd6382ec9377a411f9c5fbedf5aa207 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 01:05:44 +0300 Subject: [PATCH 36/45] Support IN --- src/sqlast/mod.rs | 35 +++++++++++++++++++++++ src/sqlparser.rs | 58 ++++++++++++++++++++++++++++---------- tests/sqlparser_generic.rs | 38 +++++++++++++++++++++++++ 3 files changed, 116 insertions(+), 15 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 2563b801a..64dbe98f8 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -52,6 +52,18 @@ pub enum ASTNode { SQLIsNull(Box), /// `IS NOT NULL` expression SQLIsNotNull(Box), + /// `[ NOT ] IN (val1, val2, ...)` + SQLInList { + expr: Box, + list: Vec, + negated: bool, + }, + /// `[ NOT ] IN (SELECT ...)` + SQLInSubquery { + expr: Box, + subquery: Box, + negated: bool, + }, /// Binary expression e.g. `1 + 1` or `foo > bar` SQLBinaryExpr { left: Box, @@ -96,6 +108,29 @@ impl ToString for ASTNode { ASTNode::SQLCompoundIdentifier(s) => s.join("."), ASTNode::SQLIsNull(ast) => format!("{} IS NULL", ast.as_ref().to_string()), ASTNode::SQLIsNotNull(ast) => format!("{} IS NOT NULL", ast.as_ref().to_string()), + ASTNode::SQLInList { + expr, + list, + negated, + } => format!( + "{} {}IN ({})", + expr.as_ref().to_string(), + if *negated { "NOT " } else { "" }, + list.iter() + .map(|a| a.to_string()) + .collect::>() + .join(", ") + ), + ASTNode::SQLInSubquery { + expr, + subquery, + negated, + } => format!( + "{} {}IN ({})", + expr.as_ref().to_string(), + if *negated { "NOT " } else { "" }, + subquery.to_string() + ), ASTNode::SQLBinaryExpr { left, op, right } => format!( "{} {} {}", left.as_ref().to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 8f8c3e541..e4d9057f2 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -289,14 +289,6 @@ impl Parser { }) } - /// Parse a postgresql casting style which is in the form of `expr::datatype` - pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result { - Ok(ASTNode::SQLCast { - expr: Box::new(expr), - data_type: self.parse_data_type()?, - }) - } - /// Parse an expression infix (typically an operator) pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result { debug!("parsing infix"); @@ -308,24 +300,30 @@ impl Parser { } else if self.parse_keywords(vec!["NOT", "NULL"]) { Ok(ASTNode::SQLIsNotNull(Box::new(expr))) } else { - parser_err!("Invalid tokens after IS") + parser_err!(format!( + "Expected NULL or NOT NULL after IS, found {:?}", + self.peek_token() + )) } } Token::SQLWord(ref k) if k.keyword == "NOT" => { - if self.parse_keywords(vec!["LIKE"]) { + if self.parse_keyword("IN") { + self.parse_in(expr, true) + } else if self.parse_keyword("LIKE") { Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: SQLOperator::NotLike, right: Box::new(self.parse_subexpr(precedence)?), }) } else { - parser_err!("Invalid tokens after NOT") + parser_err!(format!( + "Expected IN or LIKE after NOT, found {:?}", + self.peek_token() + )) } } - Token::DoubleColon => { - let pg_cast = self.parse_pg_cast(expr)?; - Ok(pg_cast) - } + Token::SQLWord(ref k) if k.keyword == "IN" => self.parse_in(expr, false), + Token::DoubleColon => self.parse_pg_cast(expr), Token::SQLWord(_) | Token::Eq | Token::Neq @@ -350,6 +348,35 @@ impl Parser { } } + /// Parses the parens following the `[ NOT ] IN` operator + pub fn parse_in(&mut self, expr: ASTNode, negated: bool) -> Result { + self.expect_token(&Token::LParen)?; + let in_op = if self.parse_keyword("SELECT") || self.parse_keyword("WITH") { + self.prev_token(); + ASTNode::SQLInSubquery { + expr: Box::new(expr), + subquery: Box::new(self.parse_query()?), + negated, + } + } else { + ASTNode::SQLInList { + expr: Box::new(expr), + list: self.parse_expr_list()?, + negated, + } + }; + self.expect_token(&Token::RParen)?; + Ok(in_op) + } + + /// Parse a postgresql casting style which is in the form of `expr::datatype` + pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result { + Ok(ASTNode::SQLCast { + expr: Box::new(expr), + data_type: self.parse_data_type()?, + }) + } + /// Convert a token operator to an AST operator pub fn to_sql_operator(&self, tok: &Token) -> Result { match tok { @@ -390,6 +417,7 @@ impl Parser { &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10), &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(17), + &Token::SQLWord(ref k) if k.keyword == "IN" => Ok(20), &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 125f4bb7a..81150f642 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -250,6 +250,44 @@ fn parse_not_like() { ); } +#[test] +fn parse_in_list() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE segment {}IN ('HIGH', 'MED')", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLInList { + expr: Box::new(ASTNode::SQLIdentifier("segment".to_string())), + list: vec![ + ASTNode::SQLValue(Value::SingleQuotedString("HIGH".to_string())), + ASTNode::SQLValue(Value::SingleQuotedString("MED".to_string())), + ], + negated, + }, + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + +#[test] +fn parse_in_subquery() { + let sql = "SELECT * FROM customers WHERE segment IN (SELECT segm FROM bar)"; + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLInSubquery { + expr: Box::new(ASTNode::SQLIdentifier("segment".to_string())), + subquery: Box::new(verified_query("SELECT segm FROM bar")), + negated: false, + }, + select.selection.unwrap() + ); +} + #[test] fn parse_select_order_by() { fn chk(sql: &str) { From 786b1cf18abad4bfbee5966bc31e0225eeb0fca0 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 02:18:09 +0300 Subject: [PATCH 37/45] Support BETWEEN --- src/sqlast/mod.rs | 19 +++++++++++++++++++ src/sqlparser.rs | 17 +++++++++++++++++ tests/sqlparser_generic.rs | 22 ++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 64dbe98f8..85dc9f92b 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -64,6 +64,13 @@ pub enum ASTNode { subquery: Box, negated: bool, }, + /// [ NOT ] BETWEEN AND + SQLBetween { + expr: Box, + negated: bool, + low: Box, + high: Box, + }, /// Binary expression e.g. `1 + 1` or `foo > bar` SQLBinaryExpr { left: Box, @@ -131,6 +138,18 @@ impl ToString for ASTNode { if *negated { "NOT " } else { "" }, subquery.to_string() ), + ASTNode::SQLBetween { + expr, + negated, + low, + high, + } => format!( + "{} {}BETWEEN {} AND {}", + expr.to_string(), + if *negated { "NOT " } else { "" }, + low.to_string(), + high.to_string() + ), ASTNode::SQLBinaryExpr { left, op, right } => format!( "{} {} {}", left.as_ref().to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index e4d9057f2..8ee45a427 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -309,6 +309,8 @@ impl Parser { Token::SQLWord(ref k) if k.keyword == "NOT" => { if self.parse_keyword("IN") { self.parse_in(expr, true) + } else if self.parse_keyword("BETWEEN") { + self.parse_between(expr, true) } else if self.parse_keyword("LIKE") { Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), @@ -323,6 +325,7 @@ impl Parser { } } Token::SQLWord(ref k) if k.keyword == "IN" => self.parse_in(expr, false), + Token::SQLWord(ref k) if k.keyword == "BETWEEN" => self.parse_between(expr, false), Token::DoubleColon => self.parse_pg_cast(expr), Token::SQLWord(_) | Token::Eq @@ -369,6 +372,19 @@ impl Parser { Ok(in_op) } + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed + pub fn parse_between(&mut self, expr: ASTNode, negated: bool) -> Result { + let low = self.parse_prefix()?; + self.expect_keyword("AND")?; + let high = self.parse_prefix()?; + Ok(ASTNode::SQLBetween { + expr: Box::new(expr), + negated, + low: Box::new(low), + high: Box::new(high), + }) + } + /// Parse a postgresql casting style which is in the form of `expr::datatype` pub fn parse_pg_cast(&mut self, expr: ASTNode) -> Result { Ok(ASTNode::SQLCast { @@ -418,6 +434,7 @@ impl Parser { &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(17), &Token::SQLWord(ref k) if k.keyword == "IN" => Ok(20), + &Token::SQLWord(ref k) if k.keyword == "BETWEEN" => Ok(20), &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 81150f642..5573a9c91 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -288,6 +288,28 @@ fn parse_in_subquery() { ); } +#[test] +fn parse_between() { + fn chk(negated: bool) { + let sql = &format!( + "SELECT * FROM customers WHERE age {}BETWEEN 25 AND 32", + if negated { "NOT " } else { "" } + ); + let select = verified_only_select(sql); + assert_eq!( + ASTNode::SQLBetween { + expr: Box::new(ASTNode::SQLIdentifier("age".to_string())), + low: Box::new(ASTNode::SQLValue(Value::Long(25))), + high: Box::new(ASTNode::SQLValue(Value::Long(32))), + negated, + }, + select.selection.unwrap() + ); + } + chk(false); + chk(true); +} + #[test] fn parse_select_order_by() { fn chk(sql: &str) { From 54c9ca86193d25fa29a2cd774a274bc9755dfabc Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 02:30:07 +0300 Subject: [PATCH 38/45] Support unary + / - --- src/sqlparser.rs | 10 +++++++++- tests/sqlparser_generic.rs | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 8ee45a427..ed0fd82b6 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -174,6 +174,7 @@ impl Parser { expr: Box::new(self.parse_subexpr(p)?), }) } + // another SQLWord: _ => match self.peek_token() { Some(Token::LParen) => self.parse_function(w.as_sql_ident()), Some(Token::Period) => { @@ -201,8 +202,15 @@ impl Parser { } _ => Ok(ASTNode::SQLIdentifier(w.as_sql_ident())), }, - }, + }, // End of Token::SQLWord Token::Mult => Ok(ASTNode::SQLWildcard), + tok @ Token::Minus | tok @ Token::Plus => { + let p = self.get_precedence(&tok)?; + Ok(ASTNode::SQLUnary { + operator: self.to_sql_operator(&tok)?, + expr: Box::new(self.parse_subexpr(p)?), + }) + } Token::Number(_) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) => { diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 5573a9c91..caeeb4fb3 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -174,6 +174,27 @@ fn parse_compound_expr_2() { ); } +#[test] +fn parse_unary_math() { + use self::ASTNode::*; + use self::SQLOperator::*; + let sql = "- a + - b"; + assert_eq!( + SQLBinaryExpr { + left: Box::new(SQLUnary { + operator: Minus, + expr: Box::new(SQLIdentifier("a".to_string())), + }), + op: Plus, + right: Box::new(SQLUnary { + operator: Minus, + expr: Box::new(SQLIdentifier("b".to_string())), + }), + }, + verified_expr(sql) + ); +} + #[test] fn parse_is_null() { use self::ASTNode::*; From 23a0d032bde11538858b9ded98e3062048e91e5c Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 02:48:36 +0300 Subject: [PATCH 39/45] Support NUMERIC without precision or scale https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#exact-numeric-type --- src/sqlast/sqltype.rs | 12 ++++++++---- src/sqlparser.rs | 6 +++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/sqlast/sqltype.rs b/src/sqlast/sqltype.rs index eeb7ffe5d..06a092033 100644 --- a/src/sqlast/sqltype.rs +++ b/src/sqlast/sqltype.rs @@ -17,8 +17,8 @@ pub enum SQLType { Varbinary(usize), /// Large binary object e.g. BLOB(1000) Blob(usize), - /// Decimal type with precision and optional scale e.g. DECIMAL(10,2) - Decimal(usize, Option), + /// Decimal type with optional precision and scale e.g. DECIMAL(10,2) + Decimal(Option, Option), /// Small integer SmallInt, /// Integer @@ -75,9 +75,13 @@ impl ToString for SQLType { SQLType::Blob(size) => format!("blob({})", size), SQLType::Decimal(precision, scale) => { if let Some(scale) = scale { - format!("numeric({},{})", precision, scale) + format!("numeric({},{})", precision.unwrap(), scale) } else { - format!("numeric({})", precision) + if let Some(precision) = precision { + format!("numeric({})", precision) + } else { + format!("numeric") + } } } SQLType::SmallInt => "smallint".to_string(), diff --git a/src/sqlparser.rs b/src/sqlparser.rs index ed0fd82b6..f2b9cc31a 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -1166,7 +1166,7 @@ impl Parser { pub fn parse_optional_precision_scale( &mut self, - ) -> Result<(usize, Option), ParserError> { + ) -> Result<(Option, Option), ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_int()?; let scale = if self.consume_token(&Token::Comma) { @@ -1175,9 +1175,9 @@ impl Parser { None }; self.expect_token(&Token::RParen)?; - Ok((n as usize, scale)) + Ok((Some(n as usize), scale)) } else { - parser_err!("Expecting `(`") + Ok((None, None)) } } From 533775c0dad14e7065a5f33b97b980df5f81a8b4 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Mon, 11 Feb 2019 04:54:17 +0300 Subject: [PATCH 40/45] Support CHAR synonym for CHARACTER https://jakewheat.github.io/sql-overview/sql-2011-foundation-grammar.html#character-string-type --- src/sqlparser.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sqlparser.rs b/src/sqlparser.rs index f2b9cc31a..7fb45c219 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -984,7 +984,7 @@ impl Parser { "INT" | "INTEGER" => Ok(SQLType::Int), "BIGINT" => Ok(SQLType::BigInt), "VARCHAR" => Ok(SQLType::Varchar(self.parse_optional_precision()?)), - "CHARACTER" => { + "CHAR" | "CHARACTER" => { if self.parse_keyword("VARYING") { Ok(SQLType::Varchar(self.parse_optional_precision()?)) } else { From 52e0f55b6f4d3330008ad1304d0270ba0f0bb6f6 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Thu, 7 Feb 2019 14:43:37 +0300 Subject: [PATCH 41/45] Support UNION/EXCEPT/INTERSECT --- src/dialect/keywords.rs | 6 ++-- src/sqlast/mod.rs | 2 +- src/sqlast/query.rs | 62 +++++++++++++++++++++++++++++++++++++- src/sqlparser.rs | 62 ++++++++++++++++++++++++++++++++++++-- tests/sqlparser_ansi.rs | 2 +- tests/sqlparser_generic.rs | 23 +++++++++++++- 6 files changed, 147 insertions(+), 10 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index b3dd1e27c..d51e0cf2e 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -716,8 +716,7 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ // Reserved as both a table and a column alias: - WITH, SELECT, WHERE, GROUP, ORDER, - // TODO add these with tests: UNION, EXCEPT, INTERSECT, + WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT, // Reserved only as a table alias in the `FROM`/`JOIN` clauses: ON, JOIN, INNER, CROSS, FULL, LEFT, RIGHT, NATURAL, USING, ]; @@ -726,8 +725,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ /// can be parsed unambiguously without looking ahead. pub const RESERVED_FOR_COLUMN_ALIAS: &'static [&'static str] = &[ // Reserved as both a table and a column alias: - WITH, SELECT, WHERE, GROUP, ORDER, - // TODO add these with tests: UNION, EXCEPT, INTERSECT, + WITH, SELECT, WHERE, GROUP, ORDER, UNION, EXCEPT, INTERSECT, // Reserved only as a column alias in the `SELECT` clause: FROM, ]; diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 85dc9f92b..0981eb898 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -22,7 +22,7 @@ mod value; pub use self::query::{ Cte, Join, JoinConstraint, JoinOperator, SQLOrderByExpr, SQLQuery, SQLSelect, SQLSelectItem, - TableFactor, + SQLSetExpr, SQLSetOperator, TableFactor, }; pub use self::sqltype::SQLType; pub use self::table_key::{AlterOperation, Key, TableKey}; diff --git a/src/sqlast/query.rs b/src/sqlast/query.rs index 9ccf56028..69577e557 100644 --- a/src/sqlast/query.rs +++ b/src/sqlast/query.rs @@ -7,7 +7,7 @@ pub struct SQLQuery { /// WITH (common table expressions, or CTEs) pub ctes: Vec, /// SELECT or UNION / EXCEPT / INTECEPT - pub body: SQLSelect, + pub body: SQLSetExpr, /// ORDER BY pub order_by: Option>, /// LIMIT @@ -45,6 +45,66 @@ impl ToString for SQLQuery { } } +/// A node in a tree, representing a "query body" expression, roughly: +/// `SELECT ... [ {UNION|EXCEPT|INTERSECT} SELECT ...]` +#[derive(Debug, Clone, PartialEq)] +pub enum SQLSetExpr { + /// Restricted SELECT .. FROM .. HAVING (no ORDER BY or set operations) + Select(SQLSelect), + /// Parenthesized SELECT subquery, which may include more set operations + /// in its body and an optional ORDER BY / LIMIT. + Query(Box), + /// UNION/EXCEPT/INTERSECT of two queries + SetOperation { + op: SQLSetOperator, + all: bool, + left: Box, + right: Box, + }, + // TODO: ANSI SQL supports `TABLE` and `VALUES` here. +} + +impl ToString for SQLSetExpr { + fn to_string(&self) -> String { + match self { + SQLSetExpr::Select(s) => s.to_string(), + SQLSetExpr::Query(q) => format!("({})", q.to_string()), + SQLSetExpr::SetOperation { + left, + right, + op, + all, + } => { + let all_str = if *all { " ALL" } else { "" }; + format!( + "{} {}{} {}", + left.to_string(), + op.to_string(), + all_str, + right.to_string() + ) + } + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum SQLSetOperator { + Union, + Except, + Intersect, +} + +impl ToString for SQLSetOperator { + fn to_string(&self) -> String { + match self { + SQLSetOperator::Union => "UNION".to_string(), + SQLSetOperator::Except => "EXCEPT".to_string(), + SQLSetOperator::Intersect => "INTERSECT".to_string(), + } + } +} + /// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may /// appear either as the only body item of an `SQLQuery`, or as an operand /// to a set operation like `UNION`. diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 7fb45c219..263e6e991 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -60,6 +60,7 @@ impl Parser { let mut parser = Parser::new(tokens); let mut stmts = Vec::new(); let mut expecting_statement_delimiter = false; + debug!("Parsing sql '{}'...", sql); loop { // ignore empty statements (between successive statement delimiters) while parser.consume_token(&Token::SemiColon) { @@ -1208,8 +1209,7 @@ impl Parser { vec![] }; - self.expect_keyword("SELECT")?; - let body = self.parse_select()?; + let body = self.parse_query_body(0)?; let order_by = if self.parse_keywords(vec!["ORDER", "BY"]) { Some(self.parse_order_by_expr_list()?) @@ -1252,6 +1252,64 @@ impl Parser { return Ok(cte); } + /// Parse a "query body", which is an expression with roughly the + /// following grammar: + /// ```text + /// query_body ::= restricted_select | '(' subquery ')' | set_operation + /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] + /// subquery ::= query_body [ order_by_limit ] + /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body + /// ``` + fn parse_query_body(&mut self, precedence: u8) -> Result { + // We parse the expression using a Pratt parser, as in `parse_expr()`. + // Start by parsing a restricted SELECT or a `(subquery)`: + let mut expr = if self.parse_keyword("SELECT") { + SQLSetExpr::Select(self.parse_select()?) + } else if self.consume_token(&Token::LParen) { + // CTEs are not allowed here, but the parser currently accepts them + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + SQLSetExpr::Query(Box::new(subquery)) + } else { + parser_err!("Expected SELECT or a subquery in the query body!")? + }; + + loop { + // The query can be optionally followed by a set operator: + let next_token = self.peek_token(); + let op = self.parse_set_operator(&next_token); + let next_precedence = match op { + // UNION and EXCEPT have the same binding power and evaluate left-to-right + Some(SQLSetOperator::Union) | Some(SQLSetOperator::Except) => 10, + // INTERSECT has higher precedence than UNION/EXCEPT + Some(SQLSetOperator::Intersect) => 20, + // Unexpected token or EOF => stop parsing the query body + None => break, + }; + if precedence >= next_precedence { + break; + } + self.next_token(); // skip past the set operator + expr = SQLSetExpr::SetOperation { + left: Box::new(expr), + op: op.unwrap(), + all: self.parse_keyword("ALL"), + right: Box::new(self.parse_query_body(next_precedence)?), + }; + } + + Ok(expr) + } + + fn parse_set_operator(&mut self, token: &Option) -> Option { + match token { + Some(Token::SQLWord(w)) if w.keyword == "UNION" => Some(SQLSetOperator::Union), + Some(Token::SQLWord(w)) if w.keyword == "EXCEPT" => Some(SQLSetOperator::Except), + Some(Token::SQLWord(w)) if w.keyword == "INTERSECT" => Some(SQLSetOperator::Intersect), + _ => None, + } + } + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { diff --git a/tests/sqlparser_ansi.rs b/tests/sqlparser_ansi.rs index 871046b1a..73054fb78 100644 --- a/tests/sqlparser_ansi.rs +++ b/tests/sqlparser_ansi.rs @@ -12,7 +12,7 @@ fn parse_simple_select() { assert_eq!(1, ast.len()); match ast.first().unwrap() { SQLStatement::SQLSelect(SQLQuery { - body: SQLSelect { projection, .. }, + body: SQLSetExpr::Select(SQLSelect { projection, .. }), .. }) => { assert_eq!(3, projection.len()); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index caeeb4fb3..e9be58ea4 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -825,6 +825,24 @@ fn parse_derived_tables() { //TODO: add assertions } +#[test] +fn parse_union() { + // TODO: add assertions + verified_stmt("SELECT 1 UNION SELECT 2"); + verified_stmt("SELECT 1 UNION ALL SELECT 2"); + verified_stmt("SELECT 1 EXCEPT SELECT 2"); + verified_stmt("SELECT 1 EXCEPT ALL SELECT 2"); + verified_stmt("SELECT 1 INTERSECT SELECT 2"); + verified_stmt("SELECT 1 INTERSECT ALL SELECT 2"); + verified_stmt("SELECT 1 UNION SELECT 2 UNION SELECT 3"); + verified_stmt("SELECT 1 EXCEPT SELECT 2 UNION SELECT 3"); // Union[Except[1,2], 3] + verified_stmt("SELECT 1 INTERSECT (SELECT 2 EXCEPT SELECT 3)"); + verified_stmt("WITH cte AS (SELECT 1 AS foo) (SELECT foo FROM cte ORDER BY 1 LIMIT 1)"); + verified_stmt("SELECT 1 UNION (SELECT 2 ORDER BY 1 LIMIT 1)"); + verified_stmt("SELECT 1 UNION SELECT 2 INTERSECT SELECT 3"); // Union[1, Intersect[2,3]] + verified_stmt("SELECT foo FROM tab UNION SELECT bar FROM TAB"); +} + #[test] fn parse_multiple_statements() { fn test_with(sql1: &str, sql2_kw: &str, sql2_rest: &str) { @@ -920,7 +938,10 @@ fn expr_from_projection(item: &SQLSelectItem) -> &ASTNode { } fn verified_only_select(query: &str) -> SQLSelect { - verified_query(query).body + match verified_query(query).body { + SQLSetExpr::Select(s) => s, + _ => panic!("Expected SQLSetExpr::Select"), + } } fn verified_stmt(query: &str) -> SQLStatement { From 23a0fc79f5c5b9f23e25bd18ea57ab8e400f07a1 Mon Sep 17 00:00:00 2001 From: Nikhil Benesch Date: Wed, 6 Mar 2019 17:19:40 -0500 Subject: [PATCH 42/45] Support CREATE MATERIALIZED VIEW --- src/dialect/keywords.rs | 2 ++ src/sqlast/mod.rs | 6 ++++-- src/sqlparser.rs | 7 +++++-- tests/sqlparser_generic.rs | 16 +++++++++++++++- 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index d51e0cf2e..e270e8a9f 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -191,6 +191,7 @@ keyword!( LOCATION, LOWER, MATCH, + MATERIALIZED, MAX, MEMBER, MERGE, @@ -539,6 +540,7 @@ pub const ALL_KEYWORDS: &'static [&'static str] = &[ LOCATION, LOWER, MATCH, + MATERIALIZED, MAX, MEMBER, MERGE, diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 0981eb898..a37a171c9 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -241,6 +241,7 @@ pub enum SQLStatement { /// View name name: SQLObjectName, query: SQLQuery, + materialized: bool, }, /// CREATE TABLE SQLCreateTable { @@ -347,8 +348,9 @@ impl ToString for SQLStatement { } s } - SQLStatement::SQLCreateView { name, query } => { - format!("CREATE VIEW {} AS {}", name.to_string(), query.to_string()) + SQLStatement::SQLCreateView { name, query, materialized } => { + let modifier = if *materialized { " MATERIALIZED" } else { "" }; + format!("CREATE{} VIEW {} AS {}", modifier, name.to_string(), query.to_string()) } SQLStatement::SQLCreateTable { name, columns } => format!( "CREATE TABLE {} ({})", diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 263e6e991..6b199724b 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -617,7 +617,8 @@ impl Parser { pub fn parse_create(&mut self) -> Result { if self.parse_keyword("TABLE") { self.parse_create_table() - } else if self.parse_keyword("VIEW") { + } else if self.parse_keyword("MATERIALIZED") || self.parse_keyword("VIEW") { + self.prev_token(); self.parse_create_view() } else { parser_err!(format!( @@ -628,6 +629,8 @@ impl Parser { } pub fn parse_create_view(&mut self) -> Result { + let materialized = self.parse_keyword("MATERIALIZED"); + self.expect_keyword("VIEW")?; // Many dialects support `OR REPLACE` | `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let name = self.parse_object_name()?; @@ -637,7 +640,7 @@ impl Parser { self.expect_keyword("AS")?; let query = self.parse_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. - Ok(SQLStatement::SQLCreateView { name, query }) + Ok(SQLStatement::SQLCreateView { name, query, materialized }) } pub fn parse_create_table(&mut self) -> Result { diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index e9be58ea4..bbfeb4475 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -901,9 +901,23 @@ fn parse_scalar_subqueries() { fn parse_create_view() { let sql = "CREATE VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { - SQLStatement::SQLCreateView { name, query } => { + SQLStatement::SQLCreateView { name, query, materialized } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(!materialized); + } + _ => assert!(false), + } +} + +#[test] +fn parse_create_materialized_view() { + let sql = "CREATE MATERIALIZED VIEW myschema.myview AS SELECT foo FROM bar"; + match verified_stmt(sql) { + SQLStatement::SQLCreateView { name, query, materialized } => { + assert_eq!("myschema.myview", name.to_string()); + assert_eq!("SELECT foo FROM bar", query.to_string()); + assert!(materialized); } _ => assert!(false), } From f30ab89ad289f50084f68824e3aaa8abb945e5b4 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Fri, 8 Mar 2019 15:46:40 +0300 Subject: [PATCH 43/45] Re-run cargo fmt --- src/sqlast/mod.rs | 13 +++++++++++-- src/sqlparser.rs | 6 +++++- tests/sqlparser_generic.rs | 12 ++++++++++-- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index a37a171c9..fe30586b4 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -348,9 +348,18 @@ impl ToString for SQLStatement { } s } - SQLStatement::SQLCreateView { name, query, materialized } => { + SQLStatement::SQLCreateView { + name, + query, + materialized, + } => { let modifier = if *materialized { " MATERIALIZED" } else { "" }; - format!("CREATE{} VIEW {} AS {}", modifier, name.to_string(), query.to_string()) + format!( + "CREATE{} VIEW {} AS {}", + modifier, + name.to_string(), + query.to_string() + ) } SQLStatement::SQLCreateTable { name, columns } => format!( "CREATE TABLE {} ({})", diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 6b199724b..22d0e70ce 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -640,7 +640,11 @@ impl Parser { self.expect_keyword("AS")?; let query = self.parse_query()?; // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. - Ok(SQLStatement::SQLCreateView { name, query, materialized }) + Ok(SQLStatement::SQLCreateView { + name, + query, + materialized, + }) } pub fn parse_create_table(&mut self) -> Result { diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index bbfeb4475..b3e418a06 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -901,7 +901,11 @@ fn parse_scalar_subqueries() { fn parse_create_view() { let sql = "CREATE VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { - SQLStatement::SQLCreateView { name, query, materialized } => { + SQLStatement::SQLCreateView { + name, + query, + materialized, + } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!("SELECT foo FROM bar", query.to_string()); assert!(!materialized); @@ -914,7 +918,11 @@ fn parse_create_view() { fn parse_create_materialized_view() { let sql = "CREATE MATERIALIZED VIEW myschema.myview AS SELECT foo FROM bar"; match verified_stmt(sql) { - SQLStatement::SQLCreateView { name, query, materialized } => { + SQLStatement::SQLCreateView { + name, + query, + materialized, + } => { assert_eq!("myschema.myview", name.to_string()); assert_eq!("SELECT foo FROM bar", query.to_string()); assert!(materialized); From 6dfe7c2413f7472066a15750426de4608b535060 Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 2 Apr 2019 09:15:15 +0300 Subject: [PATCH 44/45] reapply the "Rework keyword/identifier parsing" PR --- src/dialect/ansi_sql.rs | 331 ---------------------------- src/dialect/generic_sql.rs | 14 -- src/dialect/keywords.rs | 375 +++++++++++++++++++++++++++++++- src/dialect/mod.rs | 2 - src/dialect/postgresql.rs | 16 -- src/sqlast/mod.rs | 41 ++-- src/sqlast/table_key.rs | 8 +- src/sqlparser.rs | 423 ++++++++++++++++++------------------ src/sqltokenizer.rs | 127 +++++++---- tests/sqlparser_generic.rs | 115 +++++++--- tests/sqlparser_postgres.rs | 26 +-- 11 files changed, 787 insertions(+), 691 deletions(-) diff --git a/src/dialect/ansi_sql.rs b/src/dialect/ansi_sql.rs index b91fdc6e9..4026cf61c 100644 --- a/src/dialect/ansi_sql.rs +++ b/src/dialect/ansi_sql.rs @@ -1,339 +1,8 @@ use dialect::Dialect; -use dialect::keywords::*; - pub struct AnsiSqlDialect {} impl Dialect for AnsiSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - ABS, - ALL, - ALLOCATE, - ALTER, - AND, - ANY, - ARE, - ARRAY, - ARRAY_AGG, - ARRAY_MAX_CARDINALITY, - AS, - ASENSITIVE, - ASYMMETRIC, - AT, - ATOMIC, - AUTHORIZATION, - AVG, - BEGIN, - BEGIN_FRAME, - BEGIN_PARTITION, - BETWEEN, - BIGINT, - BINARY, - BLOB, - BOOLEAN, - BOTH, - BY, - CALL, - CALLED, - CARDINALITY, - CASCADED, - CASE, - CAST, - CEIL, - CEILING, - CHAR, - CHAR_LENGTH, - CHARACTER, - CHARACTER_LENGTH, - CHECK, - CLOB, - CLOSE, - COALESCE, - COLLATE, - COLLECT, - COLUMN, - COMMIT, - CONDITION, - CONNECT, - CONSTRAINT, - CONTAINS, - CONVERT, - CORR, - CORRESPONDING, - COUNT, - COVAR_POP, - COVAR_SAMP, - CREATE, - CROSS, - CUBE, - CUME_DIST, - CURRENT, - CURRENT_CATALOG, - CURRENT_DATE, - CURRENT_DEFAULT_TRANSFORM_GROUP, - CURRENT_PATH, - CURRENT_ROLE, - CURRENT_ROW, - CURRENT_SCHEMA, - CURRENT_TIME, - CURRENT_TIMESTAMP, - CURRENT_TRANSFORM_GROUP_FOR_TYPE, - CURRENT_USER, - CURSOR, - CYCLE, - DATE, - DAY, - DEALLOCATE, - DEC, - DECIMAL, - DECLARE, - DEFAULT, - DELETE, - DENSE_RANK, - DEREF, - DESCRIBE, - DETERMINISTIC, - DISCONNECT, - DISTINCT, - DOUBLE, - DROP, - DYNAMIC, - EACH, - ELEMENT, - ELSE, - END, - END_FRAME, - END_PARTITION, - END_EXEC, - EQUALS, - ESCAPE, - EVERY, - EXCEPT, - EXEC, - EXECUTE, - EXISTS, - EXP, - EXTERNAL, - EXTRACT, - FALSE, - FETCH, - FILTER, - FIRST_VALUE, - FLOAT, - FLOOR, - FOR, - FOREIGN, - FRAME_ROW, - FREE, - FROM, - FULL, - FUNCTION, - FUSION, - GET, - GLOBAL, - GRANT, - GROUP, - GROUPING, - GROUPS, - HAVING, - HOLD, - HOUR, - IDENTITY, - IN, - INDICATOR, - INNER, - INOUT, - INSENSITIVE, - INSERT, - INT, - INTEGER, - INTERSECT, - INTERSECTION, - INTERVAL, - INTO, - IS, - JOIN, - LAG, - LANGUAGE, - LARGE, - LAST_VALUE, - LATERAL, - LEAD, - LEADING, - LEFT, - LIKE, - LIKE_REGEX, - LN, - LOCAL, - LOCALTIME, - LOCALTIMESTAMP, - LOWER, - MATCH, - MAX, - MEMBER, - MERGE, - METHOD, - MIN, - MINUTE, - MOD, - MODIFIES, - MODULE, - MONTH, - MULTISET, - NATIONAL, - NATURAL, - NCHAR, - NCLOB, - NEW, - NO, - NONE, - NORMALIZE, - NOT, - NTH_VALUE, - NTILE, - NULL, - NULLIF, - NUMERIC, - OCTET_LENGTH, - OCCURRENCES_REGEX, - OF, - OFFSET, - OLD, - ON, - ONLY, - OPEN, - OR, - ORDER, - OUT, - OUTER, - OVER, - OVERLAPS, - OVERLAY, - PARAMETER, - PARTITION, - PERCENT, - PERCENT_RANK, - PERCENTILE_CONT, - PERCENTILE_DISC, - PERIOD, - PORTION, - POSITION, - POSITION_REGEX, - POWER, - PRECEDES, - PRECISION, - PREPARE, - PRIMARY, - PROCEDURE, - RANGE, - RANK, - READS, - REAL, - RECURSIVE, - REF, - REFERENCES, - REFERENCING, - REGR_AVGX, - REGR_AVGY, - REGR_COUNT, - REGR_INTERCEPT, - REGR_R2, - REGR_SLOPE, - REGR_SXX, - REGR_SXY, - REGR_SYY, - RELEASE, - RESULT, - RETURN, - RETURNS, - REVOKE, - RIGHT, - ROLLBACK, - ROLLUP, - ROW, - ROW_NUMBER, - ROWS, - SAVEPOINT, - SCOPE, - SCROLL, - SEARCH, - SECOND, - SELECT, - SENSITIVE, - SESSION_USER, - SET, - SIMILAR, - SMALLINT, - SOME, - SPECIFIC, - SPECIFICTYPE, - SQL, - SQLEXCEPTION, - SQLSTATE, - SQLWARNING, - SQRT, - START, - STATIC, - STDDEV_POP, - STDDEV_SAMP, - SUBMULTISET, - SUBSTRING, - SUBSTRING_REGEX, - SUCCEEDS, - SUM, - SYMMETRIC, - SYSTEM, - SYSTEM_TIME, - SYSTEM_USER, - TABLE, - TABLESAMPLE, - THEN, - TIME, - TIMESTAMP, - TIMEZONE_HOUR, - TIMEZONE_MINUTE, - TO, - TRAILING, - TRANSLATE, - TRANSLATE_REGEX, - TRANSLATION, - TREAT, - TRIGGER, - TRUNCATE, - TRIM, - TRIM_ARRAY, - TRUE, - UESCAPE, - UNION, - UNIQUE, - UNKNOWN, - UNNEST, - UPDATE, - UPPER, - USER, - USING, - VALUE, - VALUES, - VALUE_OF, - VAR_POP, - VAR_SAMP, - VARBINARY, - VARCHAR, - VARYING, - VERSIONING, - WHEN, - WHENEVER, - WHERE, - WIDTH_BUCKET, - WINDOW, - WITH, - WITHIN, - WITHOUT, - YEAR, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') } diff --git a/src/dialect/generic_sql.rs b/src/dialect/generic_sql.rs index 0f18b7234..54275d69f 100644 --- a/src/dialect/generic_sql.rs +++ b/src/dialect/generic_sql.rs @@ -1,21 +1,7 @@ use dialect::Dialect; - -use dialect::keywords::*; pub struct GenericSqlDialect {} impl Dialect for GenericSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, INTO, UPDATE, - DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, NOT, AS, - STORED, CSV, PARQUET, LOCATION, WITH, WITHOUT, HEADER, ROW, // SQL types - CHAR, CHARACTER, VARYING, LARGE, OBJECT, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, - REAL, DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, - BOOLEAN, DATE, TIME, TIMESTAMP, CASE, WHEN, THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, - CROSS, OUTER, INNER, NATURAL, ON, USING, LIKE, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/dialect/keywords.rs b/src/dialect/keywords.rs index e46837243..1a39fe448 100644 --- a/src/dialect/keywords.rs +++ b/src/dialect/keywords.rs @@ -1,12 +1,23 @@ -/// make a listing of keywords -/// with static str and their stringified value +///! This module defines +/// 1) a list of constants for every keyword that +/// can appear in SQLWord::keyword: +/// pub const KEYWORD = "KEYWORD" +/// 2) an `ALL_KEYWORDS` array with every keyword in it +/// This is not a list of *reserved* keywords: some of these can be +/// parsed as identifiers if the parser decides so. This means that +/// new keywords can be added here without affecting the parse result. +/// +/// As a matter of fact, most of these keywords are not used at all +/// and could be removed. +/// 3) a `RESERVED_FOR_TABLE_ALIAS` array with keywords reserved in a +/// "table alias" context. + macro_rules! keyword { ($($ident:ident),*) => { - $(pub static $ident: &'static str = stringify!($ident);)* + $(pub const $ident: &'static str = stringify!($ident);)* } } -/// enumerate all the keywords here for all dialects to support in this project keyword!( ABS, ADD, @@ -352,4 +363,358 @@ keyword!( ); /// special case of keyword where the it is an invalid identifier -pub static END_EXEC: &'static str = "END-EXEC"; +pub const END_EXEC: &'static str = "END-EXEC"; + +pub const ALL_KEYWORDS: &'static [&'static str] = &[ + ABS, + ADD, + ASC, + ALL, + ALLOCATE, + ALTER, + AND, + ANY, + ARE, + ARRAY, + ARRAY_AGG, + ARRAY_MAX_CARDINALITY, + AS, + ASENSITIVE, + ASYMMETRIC, + AT, + ATOMIC, + AUTHORIZATION, + AVG, + BEGIN, + BEGIN_FRAME, + BEGIN_PARTITION, + BETWEEN, + BIGINT, + BINARY, + BLOB, + BOOLEAN, + BOTH, + BY, + BYTEA, + CALL, + CALLED, + CARDINALITY, + CASCADED, + CASE, + CAST, + CEIL, + CEILING, + CHAR, + CHAR_LENGTH, + CHARACTER, + CHARACTER_LENGTH, + CHECK, + CLOB, + CLOSE, + COALESCE, + COLLATE, + COLLECT, + COLUMN, + COMMIT, + CONDITION, + CONNECT, + CONSTRAINT, + CONTAINS, + CONVERT, + COPY, + CORR, + CORRESPONDING, + COUNT, + COVAR_POP, + COVAR_SAMP, + CREATE, + CROSS, + CSV, + CUBE, + CUME_DIST, + CURRENT, + CURRENT_CATALOG, + CURRENT_DATE, + CURRENT_DEFAULT_TRANSFORM_GROUP, + CURRENT_PATH, + CURRENT_ROLE, + CURRENT_ROW, + CURRENT_SCHEMA, + CURRENT_TIME, + CURRENT_TIMESTAMP, + CURRENT_TRANSFORM_GROUP_FOR_TYPE, + CURRENT_USER, + CURSOR, + CYCLE, + DATE, + DAY, + DEALLOCATE, + DEC, + DECIMAL, + DECLARE, + DEFAULT, + DELETE, + DENSE_RANK, + DEREF, + DESC, + DESCRIBE, + DETERMINISTIC, + DISCONNECT, + DISTINCT, + DOUBLE, + DROP, + DYNAMIC, + EACH, + ELEMENT, + ELSE, + END, + END_FRAME, + END_PARTITION, + EQUALS, + ESCAPE, + EVERY, + EXCEPT, + EXEC, + EXECUTE, + EXISTS, + EXP, + EXTERNAL, + EXTRACT, + FALSE, + FETCH, + FILTER, + FIRST_VALUE, + FLOAT, + FLOOR, + FOR, + FOREIGN, + FRAME_ROW, + FREE, + FROM, + FULL, + FUNCTION, + FUSION, + GET, + GLOBAL, + GRANT, + GROUP, + GROUPING, + GROUPS, + HAVING, + HEADER, + HOLD, + HOUR, + IDENTITY, + IN, + INDICATOR, + INNER, + INOUT, + INSENSITIVE, + INSERT, + INT, + INTEGER, + INTERSECT, + INTERSECTION, + INTERVAL, + INTO, + IS, + JOIN, + KEY, + LAG, + LANGUAGE, + LARGE, + LAST_VALUE, + LATERAL, + LEAD, + LEADING, + LEFT, + LIKE, + LIKE_REGEX, + LIMIT, + LN, + LOCAL, + LOCALTIME, + LOCALTIMESTAMP, + LOCATION, + LOWER, + MATCH, + MAX, + MEMBER, + MERGE, + METHOD, + MIN, + MINUTE, + MOD, + MODIFIES, + MODULE, + MONTH, + MULTISET, + NATIONAL, + NATURAL, + NCHAR, + NCLOB, + NEW, + NO, + NONE, + NORMALIZE, + NOT, + NTH_VALUE, + NTILE, + NULL, + NULLIF, + NUMERIC, + OBJECT, + OCTET_LENGTH, + OCCURRENCES_REGEX, + OF, + OFFSET, + OLD, + ON, + ONLY, + OPEN, + OR, + ORDER, + OUT, + OUTER, + OVER, + OVERLAPS, + OVERLAY, + PARAMETER, + PARTITION, + PARQUET, + PERCENT, + PERCENT_RANK, + PERCENTILE_CONT, + PERCENTILE_DISC, + PERIOD, + PORTION, + POSITION, + POSITION_REGEX, + POWER, + PRECEDES, + PRECISION, + PREPARE, + PRIMARY, + PROCEDURE, + RANGE, + RANK, + READS, + REAL, + RECURSIVE, + REF, + REFERENCES, + REFERENCING, + REGCLASS, + REGR_AVGX, + REGR_AVGY, + REGR_COUNT, + REGR_INTERCEPT, + REGR_R2, + REGR_SLOPE, + REGR_SXX, + REGR_SXY, + REGR_SYY, + RELEASE, + RESULT, + RETURN, + RETURNS, + REVOKE, + RIGHT, + ROLLBACK, + ROLLUP, + ROW, + ROW_NUMBER, + ROWS, + SAVEPOINT, + SCOPE, + SCROLL, + SEARCH, + SECOND, + SELECT, + SENSITIVE, + SESSION_USER, + SET, + SIMILAR, + SMALLINT, + SOME, + SPECIFIC, + SPECIFICTYPE, + SQL, + SQLEXCEPTION, + SQLSTATE, + SQLWARNING, + SQRT, + START, + STATIC, + STDDEV_POP, + STDDEV_SAMP, + STDIN, + STORED, + SUBMULTISET, + SUBSTRING, + SUBSTRING_REGEX, + SUCCEEDS, + SUM, + SYMMETRIC, + SYSTEM, + SYSTEM_TIME, + SYSTEM_USER, + TABLE, + TABLESAMPLE, + TEXT, + THEN, + TIME, + TIMESTAMP, + TIMEZONE_HOUR, + TIMEZONE_MINUTE, + TO, + TRAILING, + TRANSLATE, + TRANSLATE_REGEX, + TRANSLATION, + TREAT, + TRIGGER, + TRUNCATE, + TRIM, + TRIM_ARRAY, + TRUE, + UESCAPE, + UNION, + UNIQUE, + UNKNOWN, + UNNEST, + UPDATE, + UPPER, + USER, + USING, + UUID, + VALUE, + VALUES, + VALUE_OF, + VAR_POP, + VAR_SAMP, + VARBINARY, + VARCHAR, + VARYING, + VERSIONING, + WHEN, + WHENEVER, + WHERE, + WIDTH_BUCKET, + WINDOW, + WITH, + WITHIN, + WITHOUT, + YEAR, + ZONE, + END_EXEC, +]; + +/// These keywords can't be used as a table alias, so that `FROM table_name alias` +/// can be parsed unambiguously without looking ahead. +pub const RESERVED_FOR_TABLE_ALIAS: &'static [&'static str] = &[ + WHERE, GROUP, ON, // keyword is 'reserved' in most dialects + JOIN, INNER, CROSS, FULL, LEFT, RIGHT, // not reserved in Oracle + NATURAL, USING, // not reserved in Oracle & MSSQL + ORDER, // UNION, EXCEPT, INTERSECT, // TODO add these with tests. +]; diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1a704f000..3298a1de3 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -8,8 +8,6 @@ pub use self::generic_sql::GenericSqlDialect; pub use self::postgresql::PostgreSqlDialect; pub trait Dialect { - /// Get a list of keywords for this dialect - fn keywords(&self) -> Vec<&'static str>; /// Determine if a character is a valid identifier start character fn is_identifier_start(&self, ch: char) -> bool; /// Determine if a character is a valid identifier character diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 66cb51c19..2b64c1f0a 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -1,24 +1,8 @@ use dialect::Dialect; -use dialect::keywords::*; - pub struct PostgreSqlDialect {} impl Dialect for PostgreSqlDialect { - fn keywords(&self) -> Vec<&'static str> { - return vec![ - ALTER, ONLY, SELECT, FROM, WHERE, LIMIT, ORDER, GROUP, BY, HAVING, UNION, ALL, INSERT, - INTO, UPDATE, DELETE, IN, IS, NULL, SET, CREATE, EXTERNAL, TABLE, ASC, DESC, AND, OR, - NOT, AS, STORED, CSV, WITH, WITHOUT, ROW, // SQL types - CHAR, CHARACTER, VARYING, LARGE, VARCHAR, CLOB, BINARY, VARBINARY, BLOB, FLOAT, REAL, - DOUBLE, PRECISION, INT, INTEGER, SMALLINT, BIGINT, NUMERIC, DECIMAL, DEC, BOOLEAN, - DATE, TIME, TIMESTAMP, VALUES, DEFAULT, ZONE, REGCLASS, TEXT, BYTEA, TRUE, FALSE, COPY, - STDIN, PRIMARY, KEY, UNIQUE, UUID, ADD, CONSTRAINT, FOREIGN, REFERENCES, CASE, WHEN, - THEN, ELSE, END, JOIN, LEFT, RIGHT, FULL, CROSS, OUTER, INNER, NATURAL, ON, USING, - LIKE, - ]; - } - fn is_identifier_start(&self, ch: char) -> bool { (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '@' } diff --git a/src/sqlast/mod.rs b/src/sqlast/mod.rs index 54b650a84..dbf4b9d48 100644 --- a/src/sqlast/mod.rs +++ b/src/sqlast/mod.rs @@ -25,15 +25,18 @@ pub use self::value::Value; pub use self::sql_operator::SQLOperator; +// This could be enhanced to remember the way the identifier was quoted +pub type SQLIdent = String; + /// SQL Abstract Syntax Tree (AST) #[derive(Debug, Clone, PartialEq)] pub enum ASTNode { /// Identifier e.g. table name or column name - SQLIdentifier(String), + SQLIdentifier(SQLIdent), /// Wildcard e.g. `*` SQLWildcard, /// Multi part identifier e.g. `myschema.dbo.mytable` - SQLCompoundIdentifier(Vec), + SQLCompoundIdentifier(Vec), /// Assigment e.g. `name = 'Fred'` in an UPDATE statement SQLAssignment(SQLAssignment), /// `IS NULL` expression @@ -69,12 +72,17 @@ pub enum ASTNode { results: Vec, else_result: Option>, }, + /// A table name or a parenthesized subquery with an optional alias + TableFactor { + relation: Box, // SQLNested or SQLCompoundIdentifier + alias: Option, + }, /// SELECT SQLSelect { /// projection expressions projection: Vec, /// FROM - relation: Option>, + relation: Option>, // TableFactor // JOIN joins: Vec, /// WHERE @@ -93,7 +101,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES (vector of rows to insert) values: Vec>, }, @@ -101,7 +109,7 @@ pub enum ASTNode { /// TABLE table_name: String, /// COLUMNS - columns: Vec, + columns: Vec, /// VALUES a vector of values to be copied values: Vec>, }, @@ -188,6 +196,13 @@ impl ToString for ASTNode { } s + " END" } + ASTNode::TableFactor { relation, alias } => { + if let Some(alias) = alias { + format!("{} AS {}", relation.to_string(), alias) + } else { + relation.to_string() + } + } ASTNode::SQLSelect { projection, relation, @@ -366,21 +381,21 @@ impl ToString for SQLAssignment { #[derive(Debug, Clone, PartialEq)] pub struct SQLOrderByExpr { pub expr: Box, - pub asc: bool, + pub asc: Option, } impl SQLOrderByExpr { - pub fn new(expr: Box, asc: bool) -> Self { + pub fn new(expr: Box, asc: Option) -> Self { SQLOrderByExpr { expr, asc } } } impl ToString for SQLOrderByExpr { fn to_string(&self) -> String { - if self.asc { - format!("{} ASC", self.expr.as_ref().to_string()) - } else { - format!("{} DESC", self.expr.as_ref().to_string()) + match self.asc { + Some(true) => format!("{} ASC", self.expr.to_string()), + Some(false) => format!("{} DESC", self.expr.to_string()), + None => self.expr.to_string(), } } } @@ -388,7 +403,7 @@ impl ToString for SQLOrderByExpr { /// SQL column definition #[derive(Debug, Clone, PartialEq)] pub struct SQLColumnDef { - pub name: String, + pub name: SQLIdent, pub data_type: SQLType, pub is_primary: bool, pub is_unique: bool, @@ -417,7 +432,7 @@ impl ToString for SQLColumnDef { #[derive(Debug, Clone, PartialEq)] pub struct Join { - pub relation: ASTNode, + pub relation: ASTNode, // TableFactor pub join_operator: JoinOperator, } diff --git a/src/sqlast/table_key.rs b/src/sqlast/table_key.rs index 9dacc21b3..f4ff70f4b 100644 --- a/src/sqlast/table_key.rs +++ b/src/sqlast/table_key.rs @@ -1,3 +1,5 @@ +use super::SQLIdent; + #[derive(Debug, PartialEq, Clone)] pub enum AlterOperation { AddConstraint(TableKey), @@ -17,8 +19,8 @@ impl ToString for AlterOperation { #[derive(Debug, PartialEq, Clone)] pub struct Key { - pub name: String, - pub columns: Vec, + pub name: SQLIdent, + pub columns: Vec, } #[derive(Debug, PartialEq, Clone)] @@ -29,7 +31,7 @@ pub enum TableKey { ForeignKey { key: Key, foreign_table: String, - referred_columns: Vec, + referred_columns: Vec, }, } diff --git a/src/sqlparser.rs b/src/sqlparser.rs index 42a39b01e..33b950de5 100644 --- a/src/sqlparser.rs +++ b/src/sqlparser.rs @@ -14,6 +14,7 @@ //! SQL Parser +use super::dialect::keywords; use super::dialect::Dialect; use super::sqlast::*; use super::sqltokenizer::*; @@ -77,9 +78,7 @@ impl Parser { break; } - if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { - expr = infix_expr; - } + expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } @@ -92,7 +91,7 @@ impl Parser { loop { // stop parsing on `NULL` | `NOT NULL` match self.peek_token() { - Some(Token::Keyword(ref k)) if k == "NOT" || k == "NULL" => break, + Some(Token::SQLWord(ref k)) if k.keyword == "NOT" || k.keyword == "NULL" => break, _ => {} } @@ -102,9 +101,7 @@ impl Parser { break; } - if let Some(infix_expr) = self.parse_infix(expr.clone(), next_precedence)? { - expr = infix_expr; - } + expr = self.parse_infix(expr, next_precedence)?; } Ok(expr) } @@ -113,7 +110,7 @@ impl Parser { pub fn parse_prefix(&mut self) -> Result { match self.next_token() { Some(t) => match t { - Token::Keyword(k) => match k.to_uppercase().as_ref() { + Token::SQLWord(w) => match w.keyword.as_ref() { "SELECT" => Ok(self.parse_select()?), "CREATE" => Ok(self.parse_create()?), "DELETE" => Ok(self.parse_delete()?), @@ -125,38 +122,31 @@ impl Parser { self.parse_sql_value() } "CASE" => self.parse_case_expression(), + "CAST" => self.parse_cast_expression(), "NOT" => Ok(ASTNode::SQLUnary { operator: SQLOperator::Not, expr: Box::new(self.parse_expr(0)?), }), - _ => return parser_err!(format!("No prefix parser for keyword {}", k)), - }, - Token::Mult => Ok(ASTNode::SQLWildcard), - Token::Identifier(id) => { - if "CAST" == id.to_uppercase() { - self.parse_cast_expression() - } else { - match self.peek_token() { - Some(Token::LParen) => self.parse_function(&id), - Some(Token::Period) => { - let mut id_parts: Vec = vec![id]; - while self.peek_token() == Some(Token::Period) { - self.expect_token(&Token::Period)?; - match self.next_token() { - Some(Token::Identifier(id)) => id_parts.push(id), - _ => { - return parser_err!(format!( - "Error parsing compound identifier" - )) - } + _ => match self.peek_token() { + Some(Token::LParen) => self.parse_function(&w.value), + Some(Token::Period) => { + let mut id_parts: Vec = vec![w.value]; + while self.consume_token(&Token::Period) { + match self.next_token() { + Some(Token::SQLWord(w)) => id_parts.push(w.value), + _ => { + return parser_err!(format!( + "Error parsing compound identifier" + )); } } - Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - _ => Ok(ASTNode::SQLIdentifier(id)), + Ok(ASTNode::SQLCompoundIdentifier(id_parts)) } - } - } + _ => Ok(ASTNode::SQLIdentifier(w.value)), + }, + }, + Token::Mult => Ok(ASTNode::SQLWildcard), Token::Number(_) | Token::SingleQuotedString(_) => { self.prev_token(); self.parse_sql_value() @@ -248,40 +238,36 @@ impl Parser { } /// Parse an expression infix (typically an operator) - pub fn parse_infix( - &mut self, - expr: ASTNode, - precedence: u8, - ) -> Result, ParserError> { + pub fn parse_infix(&mut self, expr: ASTNode, precedence: u8) -> Result { debug!("parsing infix"); match self.next_token() { Some(tok) => match tok { - Token::Keyword(ref k) if k == "IS" => { + Token::SQLWord(ref k) if k.keyword == "IS" => { if self.parse_keywords(vec!["NULL"]) { - Ok(Some(ASTNode::SQLIsNull(Box::new(expr)))) + Ok(ASTNode::SQLIsNull(Box::new(expr))) } else if self.parse_keywords(vec!["NOT", "NULL"]) { - Ok(Some(ASTNode::SQLIsNotNull(Box::new(expr)))) + Ok(ASTNode::SQLIsNotNull(Box::new(expr))) } else { parser_err!("Invalid tokens after IS") } } - Token::Keyword(ref k) if k == "NOT" => { + Token::SQLWord(ref k) if k.keyword == "NOT" => { if self.parse_keywords(vec!["LIKE"]) { - Ok(Some(ASTNode::SQLBinaryExpr { + Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: SQLOperator::NotLike, right: Box::new(self.parse_expr(precedence)?), - })) + }) } else { parser_err!("Invalid tokens after NOT") } } - Token::Keyword(_) => Ok(Some(ASTNode::SQLBinaryExpr { - left: Box::new(expr), - op: self.to_sql_operator(&tok)?, - right: Box::new(self.parse_expr(precedence)?), - })), - Token::Eq + Token::DoubleColon => { + let pg_cast = self.parse_pg_cast(expr)?; + Ok(pg_cast) + } + Token::SQLWord(_) + | Token::Eq | Token::Neq | Token::Gt | Token::GtEq @@ -291,18 +277,16 @@ impl Parser { | Token::Minus | Token::Mult | Token::Mod - | Token::Div => Ok(Some(ASTNode::SQLBinaryExpr { + | Token::Div => Ok(ASTNode::SQLBinaryExpr { left: Box::new(expr), op: self.to_sql_operator(&tok)?, right: Box::new(self.parse_expr(precedence)?), - })), - Token::DoubleColon => { - let pg_cast = self.parse_pg_cast(expr)?; - Ok(Some(pg_cast)) - } + }), _ => parser_err!(format!("No infix parser for token {:?}", tok)), }, - None => Ok(None), + // This is not supposed to happen, because of the precedence check + // in parse_expr. + None => parser_err!("Unexpected EOF in parse_infix"), } } @@ -320,10 +304,10 @@ impl Parser { &Token::Mult => Ok(SQLOperator::Multiply), &Token::Div => Ok(SQLOperator::Divide), &Token::Mod => Ok(SQLOperator::Modulus), - &Token::Keyword(ref k) if k == "AND" => Ok(SQLOperator::And), - &Token::Keyword(ref k) if k == "OR" => Ok(SQLOperator::Or), - //&Token::Keyword(ref k) if k == "NOT" => Ok(SQLOperator::Not), - &Token::Keyword(ref k) if k == "LIKE" => Ok(SQLOperator::Like), + &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(SQLOperator::And), + &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(SQLOperator::Or), + //&Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(SQLOperator::Not), + &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(SQLOperator::Like), _ => parser_err!(format!("Unsupported SQL operator {:?}", tok)), } } @@ -342,11 +326,11 @@ impl Parser { debug!("get_precedence() {:?}", tok); match tok { - &Token::Keyword(ref k) if k == "OR" => Ok(5), - &Token::Keyword(ref k) if k == "AND" => Ok(10), - &Token::Keyword(ref k) if k == "NOT" => Ok(15), - &Token::Keyword(ref k) if k == "IS" => Ok(15), - &Token::Keyword(ref k) if k == "LIKE" => Ok(20), + &Token::SQLWord(ref k) if k.keyword == "OR" => Ok(5), + &Token::SQLWord(ref k) if k.keyword == "AND" => Ok(10), + &Token::SQLWord(ref k) if k.keyword == "NOT" => Ok(15), + &Token::SQLWord(ref k) if k.keyword == "IS" => Ok(15), + &Token::SQLWord(ref k) if k.keyword == "LIKE" => Ok(20), &Token::Eq | &Token::Lt | &Token::LtEq | &Token::Neq | &Token::Gt | &Token::GtEq => { Ok(20) } @@ -445,13 +429,9 @@ impl Parser { #[must_use] pub fn parse_keyword(&mut self, expected: &'static str) -> bool { match self.peek_token() { - Some(Token::Keyword(k)) => { - if expected.eq_ignore_ascii_case(k.as_str()) { - self.next_token(); - true - } else { - false - } + Some(Token::SQLWord(ref k)) if expected.eq_ignore_ascii_case(&k.keyword) => { + self.next_token(); + true } _ => false, } @@ -522,7 +502,7 @@ impl Parser { let mut columns = vec![]; if self.consume_token(&Token::LParen) { loop { - if let Some(Token::Identifier(column_name)) = self.next_token() { + if let Some(Token::SQLWord(column_name)) = self.next_token() { if let Ok(data_type) = self.parse_data_type() { let is_primary = self.parse_keywords(vec!["PRIMARY", "KEY"]); let is_unique = self.parse_keyword("UNIQUE"); @@ -545,7 +525,7 @@ impl Parser { Some(Token::Comma) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name, + name: column_name.value, data_type: data_type, allow_null, is_primary, @@ -556,7 +536,7 @@ impl Parser { Some(Token::RParen) => { self.next_token(); columns.push(SQLColumnDef { - name: column_name, + name: column_name.value, data_type: data_type, allow_null, is_primary, @@ -610,19 +590,16 @@ impl Parser { } else if is_unique_key { Ok(TableKey::UniqueKey(key)) } else if is_foreign_key { - if self.parse_keyword("REFERENCES") { - let foreign_table = self.parse_tablename()?; - self.expect_token(&Token::LParen)?; - let referred_columns = self.parse_column_names()?; - self.expect_token(&Token::RParen)?; - Ok(TableKey::ForeignKey { - key, - foreign_table, - referred_columns, - }) - } else { - parser_err!("Expecting references") - } + self.expect_keyword("REFERENCES")?; + let foreign_table = self.parse_tablename()?; + self.expect_token(&Token::LParen)?; + let referred_columns = self.parse_column_names()?; + self.expect_token(&Token::RParen)?; + Ok(TableKey::ForeignKey { + key, + foreign_table, + referred_columns, + }) } else { parser_err!(format!( "Expecting primary key, unique key, or foreign key, found: {:?}", @@ -632,39 +609,33 @@ impl Parser { } pub fn parse_alter(&mut self) -> Result { - if self.parse_keyword("TABLE") { - let _ = self.parse_keyword("ONLY"); - let table_name = self.parse_tablename()?; - let operation: Result = - if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { - match self.next_token() { - Some(Token::Identifier(ref id)) => { - let table_key = self.parse_table_key(id)?; - Ok(AlterOperation::AddConstraint(table_key)) - } - _ => { - return parser_err!(format!( - "Expecting identifier, found : {:?}", - self.peek_token() - )); - } + self.expect_keyword("TABLE")?; + let _ = self.parse_keyword("ONLY"); + let table_name = self.parse_tablename()?; + let operation: Result = + if self.parse_keywords(vec!["ADD", "CONSTRAINT"]) { + match self.next_token() { + Some(Token::SQLWord(ref id)) => { + let table_key = self.parse_table_key(&id.value)?; + Ok(AlterOperation::AddConstraint(table_key)) } - } else { - return parser_err!(format!( - "Expecting ADD CONSTRAINT, found :{:?}", - self.peek_token() - )); - }; - Ok(ASTNode::SQLAlterTable { - name: table_name, - operation: operation?, - }) - } else { - parser_err!(format!( - "Expecting TABLE after ALTER, found {:?}", - self.peek_token() - )) - } + _ => { + return parser_err!(format!( + "Expecting identifier, found : {:?}", + self.peek_token() + )); + } + } + } else { + return parser_err!(format!( + "Expecting ADD CONSTRAINT, found :{:?}", + self.peek_token() + )); + }; + Ok(ASTNode::SQLAlterTable { + name: table_name, + operation: operation?, + }) } /// Parse a copy statement @@ -717,8 +688,10 @@ impl Parser { return Ok(values); } if let Some(token) = self.next_token() { - if token == Token::Identifier("N".to_string()) { - values.push(None); + if let Token::SQLWord(SQLWord { value: v, .. }) = token { + if v == "N" { + values.push(None); + } } } else { continue; @@ -737,11 +710,16 @@ impl Parser { match self.next_token() { Some(t) => { match t { - Token::Keyword(k) => match k.to_uppercase().as_ref() { + Token::SQLWord(k) => match k.keyword.as_ref() { "TRUE" => Ok(Value::Boolean(true)), "FALSE" => Ok(Value::Boolean(false)), "NULL" => Ok(Value::Null), - _ => return parser_err!(format!("No value parser for keyword {}", k)), + _ => { + return parser_err!(format!( + "No value parser for keyword {}", + k.keyword + )); + } }, //TODO: parse the timestamp here (see parse_timestamp_value()) Token::Number(ref n) if n.contains(".") => match n.parse::() { @@ -873,7 +851,7 @@ impl Parser { /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) pub fn parse_data_type(&mut self) -> Result { match self.next_token() { - Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { + Some(Token::SQLWord(k)) => match k.keyword.as_ref() { "BOOLEAN" => Ok(SQLType::Boolean), "FLOAT" => Ok(SQLType::Float(self.parse_optional_precision()?)), "REAL" => Ok(SQLType::Real), @@ -958,64 +936,92 @@ impl Parser { let (precision, scale) = self.parse_optional_precision_scale()?; Ok(SQLType::Decimal(precision, scale)) } - _ => parser_err!(format!("Invalid data type '{:?}'", k)), + _ => { + self.prev_token(); + let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type + Ok(SQLType::Custom(type_name)) + } }, - Some(Token::Identifier(_)) => { + other => parser_err!(format!("Invalid data type: '{:?}'", other)), + } + } + + /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) + /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, + /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + pub fn parse_optional_alias( + &mut self, + reserved_kwds: &[&str], + ) -> Result, ParserError> { + let after_as = self.parse_keyword("AS"); + let maybe_alias = self.next_token(); + match maybe_alias { + // Accept any identifier after `AS` (though many dialects have restrictions on + // keywords that may appear here). If there's no `AS`: don't parse keywords, + // which may start a construct allowed in this position, to be parsed as aliases. + // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, + // not an alias.) + Some(Token::SQLWord(ref w)) + if after_as || !reserved_kwds.contains(&w.keyword.as_str()) => + { + // have to clone here until #![feature(bind_by_move_pattern_guards)] is enabled by default + Ok(Some(w.value.clone())) + } + ref not_an_ident if after_as => parser_err!(format!( + "Expected an identifier after AS, got {:?}", + not_an_ident + )), + Some(_not_an_ident) => { self.prev_token(); - let type_name = self.parse_tablename()?; // TODO: this actually reads a possibly schema-qualified name of a (custom) type - Ok(SQLType::Custom(type_name)) + Ok(None) // no alias found } - other => parser_err!(format!("Invalid data type: '{:?}'", other)), + None => Ok(None), } } + /// Parse one or more identifiers with the specified separator between them pub fn parse_compound_identifier(&mut self, separator: &Token) -> Result { let mut idents = vec![]; let mut expect_identifier = true; loop { let token = &self.next_token(); match token { - Some(token) => match token { - Token::Identifier(s) => { - if expect_identifier { - expect_identifier = false; - idents.push(s.to_string()); - } else { - self.prev_token(); - break; - } - } - token if token == separator => { - if expect_identifier { - return parser_err!(format!("Expecting identifier, found {:?}", token)); - } else { - expect_identifier = true; - continue; - } - } - _ => { + Some(Token::SQLWord(s)) if expect_identifier => { + expect_identifier = false; + idents.push(s.to_string()); + } + Some(token) if token == separator && !expect_identifier => { + expect_identifier = true; + continue; + } + _ => { + if token.is_some() { self.prev_token(); - break; } - }, - None => { - self.prev_token(); break; } } } - Ok(ASTNode::SQLCompoundIdentifier(idents)) + if expect_identifier { + parser_err!(format!( + "Expecting identifier, found {:?}", + self.peek_token() + )) + } else { + Ok(ASTNode::SQLCompoundIdentifier(idents)) + } } pub fn parse_tablename(&mut self) -> Result { let identifier = self.parse_compound_identifier(&Token::Period)?; match identifier { + // TODO: should store the compound identifier itself ASTNode::SQLCompoundIdentifier(idents) => Ok(idents.join(".")), other => parser_err!(format!("Expecting compound identifier, found: {:?}", other)), } } - pub fn parse_column_names(&mut self) -> Result, ParserError> { + pub fn parse_column_names(&mut self) -> Result, ParserError> { let identifier = self.parse_compound_identifier(&Token::Comma)?; match identifier { ASTNode::SQLCompoundIdentifier(idents) => Ok(idents), @@ -1090,7 +1096,7 @@ impl Parser { let projection = self.parse_expr_list()?; let (relation, joins): (Option>, Vec) = if self.parse_keyword("FROM") { - let relation = Some(Box::new(self.parse_expr(0)?)); + let relation = Some(Box::new(self.parse_table_factor()?)); let joins = self.parse_joins()?; (relation, joins) } else { @@ -1149,6 +1155,21 @@ impl Parser { } } + /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + pub fn parse_table_factor(&mut self) -> Result { + let relation = if self.consume_token(&Token::LParen) { + self.prev_token(); + self.parse_expr(0)? + } else { + self.parse_compound_identifier(&Token::Period)? + }; + let alias = self.parse_optional_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(ASTNode::TableFactor { + relation: Box::new(relation), + alias, + }) + } + fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) @@ -1156,26 +1177,20 @@ impl Parser { let constraint = self.parse_expr(0)?; Ok(JoinConstraint::On(constraint)) } else if self.parse_keyword("USING") { - if self.consume_token(&Token::LParen) { - let attributes = self - .parse_expr_list()? - .into_iter() - .map(|ast_node| match ast_node { - ASTNode::SQLIdentifier(ident) => Ok(ident), - unexpected => { - parser_err!(format!("Expected identifier, found {:?}", unexpected)) - } - }) - .collect::, ParserError>>()?; + self.expect_token(&Token::LParen)?; + let attributes = self + .parse_expr_list()? + .into_iter() + .map(|ast_node| match ast_node { + ASTNode::SQLIdentifier(ident) => Ok(ident), + unexpected => { + parser_err!(format!("Expected identifier, found {:?}", unexpected)) + } + }) + .collect::, ParserError>>()?; - if self.consume_token(&Token::RParen) { - Ok(JoinConstraint::Using(attributes)) - } else { - parser_err!(format!("Expected token ')', found {:?}", self.peek_token())) - } - } else { - parser_err!(format!("Expected token '(', found {:?}", self.peek_token())) - } + self.expect_token(&Token::RParen)?; + Ok(JoinConstraint::Using(attributes)) } else { parser_err!(format!( "Unexpected token after JOIN: {:?}", @@ -1190,7 +1205,7 @@ impl Parser { let natural = match &self.peek_token() { Some(Token::Comma) => { self.next_token(); - let relation = self.parse_expr(0)?; + let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Implicit, @@ -1198,10 +1213,10 @@ impl Parser { joins.push(join); continue; } - Some(Token::Keyword(kw)) if kw == "CROSS" => { + Some(Token::SQLWord(kw)) if kw.keyword == "CROSS" => { self.next_token(); self.expect_keyword("JOIN")?; - let relation = self.parse_expr(0)?; + let relation = self.parse_table_factor()?; let join = Join { relation, join_operator: JoinOperator::Cross, @@ -1209,7 +1224,7 @@ impl Parser { joins.push(join); continue; } - Some(Token::Keyword(kw)) if kw == "NATURAL" => { + Some(Token::SQLWord(kw)) if kw.keyword == "NATURAL" => { self.next_token(); true } @@ -1218,49 +1233,49 @@ impl Parser { }; let join = match &self.peek_token() { - Some(Token::Keyword(kw)) if kw == "INNER" => { + Some(Token::SQLWord(kw)) if kw.keyword == "INNER" => { self.next_token(); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::Keyword(kw)) if kw == "JOIN" => { + Some(Token::SQLWord(kw)) if kw.keyword == "JOIN" => { self.next_token(); Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::Inner(self.parse_join_constraint(natural)?), } } - Some(Token::Keyword(kw)) if kw == "LEFT" => { + Some(Token::SQLWord(kw)) if kw.keyword == "LEFT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::LeftOuter( self.parse_join_constraint(natural)?, ), } } - Some(Token::Keyword(kw)) if kw == "RIGHT" => { + Some(Token::SQLWord(kw)) if kw.keyword == "RIGHT" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::RightOuter( self.parse_join_constraint(natural)?, ), } } - Some(Token::Keyword(kw)) if kw == "FULL" => { + Some(Token::SQLWord(kw)) if kw.keyword == "FULL" => { self.next_token(); let _ = self.parse_keyword("OUTER"); self.expect_keyword("JOIN")?; Join { - relation: self.parse_expr(0)?, + relation: self.parse_table_factor()?, join_operator: JoinOperator::FullOuter( self.parse_join_constraint(natural)?, ), @@ -1321,33 +1336,19 @@ impl Parser { loop { let expr = self.parse_expr(0)?; - // look for optional ASC / DESC specifier - let asc = match self.peek_token() { - Some(Token::Keyword(k)) => match k.to_uppercase().as_ref() { - "ASC" => { - self.next_token(); - true - } - "DESC" => { - self.next_token(); - false - } - _ => true, - }, - Some(Token::Comma) => true, - _ => true, + let asc = if self.parse_keyword("ASC") { + Some(true) + } else if self.parse_keyword("DESC") { + Some(false) + } else { + None }; expr_list.push(SQLOrderByExpr::new(Box::new(expr), asc)); - if let Some(t) = self.peek_token() { - if t == Token::Comma { - self.next_token(); - } else { - break; - } + if let Some(Token::Comma) = self.peek_token() { + self.next_token(); } else { - // EOF break; } } diff --git a/src/sqltokenizer.rs b/src/sqltokenizer.rs index 504088227..0095e5058 100644 --- a/src/sqltokenizer.rs +++ b/src/sqltokenizer.rs @@ -21,23 +21,20 @@ use std::iter::Peekable; use std::str::Chars; +use super::dialect::keywords::ALL_KEYWORDS; use super::dialect::Dialect; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq)] pub enum Token { - /// SQL identifier e.g. table or column name - Identifier(String), - /// SQL keyword e.g. Keyword("SELECT") - Keyword(String), + /// A keyword (like SELECT) or an optionally quoted SQL identifier + SQLWord(SQLWord), /// Numeric literal Number(String), /// A character that could not be tokenized Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), - /// Double quoted string: i.e: "string" - DoubleQuotedString(String), /// Comma Comma, /// Whitespace (space, tab, etc) @@ -93,12 +90,10 @@ pub enum Token { impl ToString for Token { fn to_string(&self) -> String { match self { - Token::Identifier(ref id) => id.to_string(), - Token::Keyword(ref k) => k.to_string(), + Token::SQLWord(ref w) => w.to_string(), Token::Number(ref n) => n.to_string(), Token::Char(ref c) => c.to_string(), Token::SingleQuotedString(ref s) => format!("'{}'", s), - Token::DoubleQuotedString(ref s) => format!("\"{}\"", s), Token::Comma => ",".to_string(), Token::Whitespace(ws) => ws.to_string(), Token::Eq => "=".to_string(), @@ -128,6 +123,54 @@ impl ToString for Token { } } +impl Token { + pub fn make_keyword(keyword: &str) -> Self { + Token::make_word(keyword, None) + } + pub fn make_word(word: &str, quote_style: Option) -> Self { + let word_uppercase = word.to_uppercase(); + //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is + // not fast but I want the simplicity for now while I experiment with pluggable + // dialects + let is_keyword = quote_style == None && ALL_KEYWORDS.contains(&word_uppercase.as_str()); + Token::SQLWord(SQLWord { + value: word.to_string(), + quote_style: quote_style, + keyword: if is_keyword { + word_uppercase.to_string() + } else { + "".to_string() + }, + }) + } +} + +/// A keyword (like SELECT) or an optionally quoted SQL identifier +#[derive(Debug, Clone, PartialEq)] +pub struct SQLWord { + /// The value of the token, without the enclosing quotes, and with the + /// escape sequences (if any) processed (TODO: escapes are not handled) + pub value: String, + /// An identifier can be "quoted" (<delimited identifier> in ANSI parlance). + /// The standard and most implementations allow using double quotes for this, + /// but some implementations support other quoting styles as well (e.g. \[MS SQL]) + pub quote_style: Option, + /// If the word was not quoted and it matched one of the known keywords, + /// this will have one of the values from dialect::keywords, otherwise empty + pub keyword: String, +} + +impl ToString for SQLWord { + fn to_string(&self) -> String { + match self.quote_style { + Some('"') => format!("\"{}\"", self.value), + Some('[') => format!("[{}]", self.value), + None => self.value.clone(), + _ => panic!("Unexpected quote_style!"), + } + } +} + #[derive(Debug, Clone, PartialEq)] pub enum Whitespace { Space, @@ -168,13 +211,6 @@ impl<'a> Tokenizer<'a> { } } - fn is_keyword(&self, s: &str) -> bool { - //TODO: need to reintroduce FnvHashSet at some point .. iterating over keywords is - // not fast but I want the simplicity for now while I experiment with pluggable - // dialects - return self.dialect.keywords().contains(&s); - } - /// Tokenize the statement and produce a vector of tokens pub fn tokenize(&mut self) -> Result, TokenizerError> { let mut peekable = self.query.chars().peekable(); @@ -189,11 +225,10 @@ impl<'a> Tokenizer<'a> { } Token::Whitespace(Whitespace::Tab) => self.col += 4, - Token::Identifier(s) => self.col += s.len() as u64, - Token::Keyword(s) => self.col += s.len() as u64, + Token::SQLWord(w) if w.quote_style == None => self.col += w.value.len() as u64, + Token::SQLWord(w) if w.quote_style != None => self.col += w.value.len() as u64 + 2, Token::Number(s) => self.col += s.len() as u64, Token::SingleQuotedString(s) => self.col += s.len() as u64, - Token::DoubleQuotedString(s) => self.col += s.len() as u64, _ => self.col += 1, } @@ -232,16 +267,12 @@ impl<'a> Tokenizer<'a> { break; } } - let upper_str = s.to_uppercase(); - if self.is_keyword(upper_str.as_str()) { - Ok(Some(Token::Keyword(upper_str))) - } else { - Ok(Some(Token::Identifier(s))) - } + Ok(Some(Token::make_word(&s, None))) } // string '\'' => { //TODO: handle escaped quotes in string + //TODO: handle newlines in string //TODO: handle EOF before terminating quote let mut s = String::new(); chars.next(); // consume @@ -275,7 +306,7 @@ impl<'a> Tokenizer<'a> { } } } - Ok(Some(Token::DoubleQuotedString(s))) + Ok(Some(Token::make_word(&s, Some('"')))) } // numbers '0'...'9' => { @@ -389,7 +420,7 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), ]; @@ -405,9 +436,9 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("sqrt")), + Token::make_word("sqrt", None), Token::LParen, Token::Number(String::from("1")), Token::RParen, @@ -424,23 +455,23 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("FROM")), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("customer")), + Token::make_word("customer", None), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("WHERE")), + Token::make_keyword("WHERE"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("id")), + Token::make_word("id", None), Token::Whitespace(Whitespace::Space), Token::Eq, Token::Whitespace(Whitespace::Space), Token::Number(String::from("1")), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("LIMIT")), + Token::make_keyword("LIMIT"), Token::Whitespace(Whitespace::Space), Token::Number(String::from("5")), ]; @@ -456,17 +487,17 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Keyword(String::from("SELECT")), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("FROM")), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("customer")), + Token::make_word("customer", None), Token::Whitespace(Whitespace::Space), - Token::Keyword(String::from("WHERE")), + Token::make_keyword("WHERE"), Token::Whitespace(Whitespace::Space), - Token::Identifier(String::from("salary")), + Token::make_word("salary", None), Token::Whitespace(Whitespace::Space), Token::Neq, Token::Whitespace(Whitespace::Space), @@ -491,7 +522,7 @@ mod tests { Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::Identifier("h".to_string()), + Token::make_word("h", None), ]; compare(expected, tokens); } @@ -507,20 +538,20 @@ mod tests { let expected = vec![ Token::Whitespace(Whitespace::Newline), Token::Whitespace(Whitespace::Newline), - Token::Keyword("SELECT".into()), + Token::make_keyword("SELECT"), Token::Whitespace(Whitespace::Space), Token::Mult, Token::Whitespace(Whitespace::Space), - Token::Keyword("FROM".into()), + Token::make_keyword("FROM"), Token::Whitespace(Whitespace::Space), - Token::Keyword("TABLE".into()), + Token::make_keyword("table"), Token::Whitespace(Whitespace::Tab), Token::Char('م'), Token::Char('ص'), Token::Char('ط'), Token::Char('ف'), Token::Char('ى'), - Token::Identifier("h".to_string()), + Token::make_word("h", None), ]; compare(expected, tokens); } @@ -533,11 +564,11 @@ mod tests { let tokens = tokenizer.tokenize().unwrap(); let expected = vec![ - Token::Identifier(String::from("a")), + Token::make_word("a", None), Token::Whitespace(Whitespace::Space), - Token::Keyword("IS".to_string()), + Token::make_keyword("IS"), Token::Whitespace(Whitespace::Space), - Token::Keyword("NULL".to_string()), + Token::make_keyword("NULL"), ]; compare(expected, tokens); diff --git a/tests/sqlparser_generic.rs b/tests/sqlparser_generic.rs index 5c8679724..c57e1d53c 100644 --- a/tests/sqlparser_generic.rs +++ b/tests/sqlparser_generic.rs @@ -229,27 +229,33 @@ fn parse_not_like() { #[test] fn parse_select_order_by() { - let sql = String::from( - "SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC", - ); - match verified(&sql) { - ASTNode::SQLSelect { order_by, .. } => { - assert_eq!( - Some(vec![ - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: true, - }, - SQLOrderByExpr { - expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: false, - }, - ]), - order_by - ); + fn chk(sql: &str) { + match verified(&sql) { + ASTNode::SQLSelect { order_by, .. } => { + assert_eq!( + Some(vec![ + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), + asc: Some(true), + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), + asc: Some(false), + }, + SQLOrderByExpr { + expr: Box::new(ASTNode::SQLIdentifier("id".to_string())), + asc: None, + }, + ]), + order_by + ); + } + _ => assert!(false), } - _ => assert!(false), } + chk("SELECT id, fname, lname FROM customer WHERE id < 5 ORDER BY lname ASC, fname DESC, id"); + // make sure ORDER is not treated as an alias + chk("SELECT id, fname, lname FROM customer ORDER BY lname ASC, fname DESC, id"); } #[test] @@ -266,11 +272,11 @@ fn parse_select_order_by_limit() { Some(vec![ SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("lname".to_string())), - asc: true, + asc: Some(true), }, SQLOrderByExpr { expr: Box::new(ASTNode::SQLIdentifier("fname".to_string())), - asc: false, + asc: Some(false), }, ]), order_by @@ -535,7 +541,10 @@ fn parse_implicit_join() { assert_eq!( joins[0], Join { - relation: ASTNode::SQLIdentifier("t2".to_string()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), + alias: None, + }, join_operator: JoinOperator::Implicit } ) @@ -554,7 +563,10 @@ fn parse_cross_join() { assert_eq!( joins[0], Join { - relation: ASTNode::SQLIdentifier("t2".to_string()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec!["t2".to_string()])), + alias: None, + }, join_operator: JoinOperator::Cross } ) @@ -567,10 +579,14 @@ fn parse_cross_join() { fn parse_joins_on() { fn join_with_constraint( relation: impl Into, + alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::SQLIdentifier(relation.into()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + alias, + }, join_operator: f(JoinConstraint::On(ASTNode::SQLBinaryExpr { left: Box::new(ASTNode::SQLIdentifier("c1".into())), op: SQLOperator::Eq, @@ -578,21 +594,35 @@ fn parse_joins_on() { })), } } + // Test parsing of aliases + assert_eq!( + joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2")), + vec![join_with_constraint( + "t2", + Some("foo".to_string()), + JoinOperator::Inner + )] + ); + parses_to( + "SELECT * FROM t1 JOIN t2 foo ON c1 = c2", + "SELECT * FROM t1 JOIN t2 AS foo ON c1 = c2", + ); + // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::Inner)] + vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::RightOuter)] + vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 ON c1 = c2")), - vec![join_with_constraint("t2", JoinOperator::FullOuter)] + vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } @@ -600,29 +630,46 @@ fn parse_joins_on() { fn parse_joins_using() { fn join_with_constraint( relation: impl Into, + alias: Option, f: impl Fn(JoinConstraint) -> JoinOperator, ) -> Join { Join { - relation: ASTNode::SQLIdentifier(relation.into()), + relation: ASTNode::TableFactor { + relation: Box::new(ASTNode::SQLCompoundIdentifier(vec![relation.into()])), + alias, + }, join_operator: f(JoinConstraint::Using(vec!["c1".into()])), } } - + // Test parsing of aliases + assert_eq!( + joins_from(verified("SELECT * FROM t1 JOIN t2 AS foo USING(c1)")), + vec![join_with_constraint( + "t2", + Some("foo".to_string()), + JoinOperator::Inner + )] + ); + parses_to( + "SELECT * FROM t1 JOIN t2 foo USING(c1)", + "SELECT * FROM t1 JOIN t2 AS foo USING(c1)", + ); + // Test parsing of different join operators assert_eq!( joins_from(verified("SELECT * FROM t1 JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::Inner)] + vec![join_with_constraint("t2", None, JoinOperator::Inner)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 LEFT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::LeftOuter)] + vec![join_with_constraint("t2", None, JoinOperator::LeftOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 RIGHT JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::RightOuter)] + vec![join_with_constraint("t2", None, JoinOperator::RightOuter)] ); assert_eq!( joins_from(verified("SELECT * FROM t1 FULL JOIN t2 USING(c1)")), - vec![join_with_constraint("t2", JoinOperator::FullOuter)] + vec![join_with_constraint("t2", None, JoinOperator::FullOuter)] ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6b6598c67..11b8cb5f2 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -13,20 +13,11 @@ fn test_prev_index() { let sql: &str = "SELECT version()"; let mut parser = parser(sql); assert_eq!(parser.prev_token(), None); - assert_eq!(parser.next_token(), Some(Token::Keyword("SELECT".into()))); - assert_eq!( - parser.next_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!( - parser.prev_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!( - parser.peek_token(), - Some(Token::Identifier("version".into())) - ); - assert_eq!(parser.prev_token(), Some(Token::Keyword("SELECT".into()))); + assert_eq!(parser.next_token(), Some(Token::make_keyword("SELECT"))); + assert_eq!(parser.next_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.prev_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.peek_token(), Some(Token::make_word("version", None))); + assert_eq!(parser.prev_token(), Some(Token::make_keyword("SELECT"))); assert_eq!(parser.prev_token(), None); } @@ -112,6 +103,13 @@ fn parse_invalid_table_name() { assert!(ast.is_err()); } +#[test] +fn parse_no_table_name() { + let mut parser = parser(""); + let ast = parser.parse_tablename(); + assert!(ast.is_err()); +} + #[test] fn parse_insert_with_columns() { let sql = String::from("INSERT INTO public.customer (id, name, active) VALUES(1, 2, 3)"); From 311f2ab429e103abede7a49c3589fc25f07d25cb Mon Sep 17 00:00:00 2001 From: Nickolay Ponomarev Date: Tue, 2 Apr 2019 09:46:11 +0300 Subject: [PATCH 45/45] bump version to 0.3.0, as requested in the PR --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a6b5eab4f..9760b81ed 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.2.5-alpha.0" +version = "0.3.0" authors = ["Andy Grove "] homepage = "https://github.com/andygrove/sqlparser-rs" documentation = "https://docs.rs/sqlparser/"