Skip to content

Commit 48b44ba

Browse files
committed
Separate statement from expr parsing (4/?)
Continuing from https://github.com/andygrove/sqlparser-rs/pull/33#issuecomment-453060427 This stops the parser from accepting (and the AST from being able to represent) SQL look-alike code that makes no sense, e.g. SELECT ... FROM (CREATE TABLE ...) foo SELECT ... FROM (1+CAST(...)) foo Generally this makes the AST less "partially typed": meaning certain parts are strongly typed (e.g. SELECT can only contain projections, relations, etc.), while everything that didn't get its own type is dumped into ASTNode, effectively untyped. After a few more fixes (yet to be implemented), `ASTNode` could become an `SQLExpression`. The Pratt-style expression parser (returning an SQLExpression) would be invoked from the top-down parser in places where a generic expression is expected (e.g. after SELECT <...>, WHERE <...>, etc.), while things like select's `projection` and `relation` could be more appropriately (narrowly) typed. Since the diff is quite large due to necessarily large number of mechanical changes, here's an overview: 1) Interface changes: - A new AST enum - `SQLStatement` - is split out of ASTNode: - The variants of the ASTNode enum, which _only_ make sense as a top level statement (INSERT, UPDATE, DELETE, CREATE, ALTER, COPY) are _moved_ to the new enum, with no other changes. - SQLSelect is _duplicated_: now available both as a variant in SQLStatement::SQLSelect (top-level SELECT) and ASTNode:: (subquery). - The main entry point (Parser::parse_sql) now expects an SQL statement as input, and returns an `SQLStatement`. 2) Parser changes: instead of detecting the top-level constructs deep down in the precedence parser (`parse_prefix`) we are able to do it just right after setting up the parser in the `parse_sql` entry point (SELECT, again, is kept in the expression parser to demonstrate how subqueries could be implemented). The rest of parser changes are mechanical ASTNode -> SQLStatement replacements resulting from the AST change. 3) Testing changes: for every test - depending on whether the input was a complete statement or an expresssion - I used an appropriate helper function: - `verified` (parses SQL, checks that it round-trips, and returns the AST) - was replaced by `verified_stmt` or `verified_expr`. - `parse_sql` (which returned AST without checking it round-tripped) was replaced by: - `parse_sql_expr` (same function, for expressions) - `one_statement_parses_to` (formerly `parses_to`), extended to deal with statements that are not expected to round-trip. The weird name is to reduce further churn when implementing multi-statement parsing. - `verified_stmt` (in 4 testcases that actually round-tripped)
1 parent e952be8 commit 48b44ba

File tree

5 files changed

+239
-193
lines changed

5 files changed

+239
-193
lines changed

src/sqlast/mod.rs

+67-52
Original file line numberDiff line numberDiff line change
@@ -79,52 +79,6 @@ pub enum ASTNode {
7979
},
8080
/// SELECT
8181
SQLSelect(SQLSelect),
82-
/// INSERT
83-
SQLInsert {
84-
/// TABLE
85-
table_name: String,
86-
/// COLUMNS
87-
columns: Vec<SQLIdent>,
88-
/// VALUES (vector of rows to insert)
89-
values: Vec<Vec<ASTNode>>,
90-
},
91-
SQLCopy {
92-
/// TABLE
93-
table_name: String,
94-
/// COLUMNS
95-
columns: Vec<SQLIdent>,
96-
/// VALUES a vector of values to be copied
97-
values: Vec<Option<String>>,
98-
},
99-
/// UPDATE
100-
SQLUpdate {
101-
/// TABLE
102-
table_name: String,
103-
/// Column assignments
104-
assignments: Vec<SQLAssignment>,
105-
/// WHERE
106-
selection: Option<Box<ASTNode>>,
107-
},
108-
/// DELETE
109-
SQLDelete {
110-
/// FROM
111-
relation: Option<Box<ASTNode>>,
112-
/// WHERE
113-
selection: Option<Box<ASTNode>>,
114-
},
115-
/// CREATE TABLE
116-
SQLCreateTable {
117-
/// Table name
118-
name: String,
119-
/// Optional schema
120-
columns: Vec<SQLColumnDef>,
121-
},
122-
/// ALTER TABLE
123-
SQLAlterTable {
124-
/// Table name
125-
name: String,
126-
operation: AlterOperation,
127-
},
12882
}
12983

13084
impl ToString for ASTNode {
@@ -186,7 +140,68 @@ impl ToString for ASTNode {
186140
}
187141
}
188142
ASTNode::SQLSelect(s) => s.to_string(),
189-
ASTNode::SQLInsert {
143+
}
144+
}
145+
}
146+
147+
/// A top-level statement (SELECT, INSERT, CREATE, etc.)
148+
#[derive(Debug, Clone, PartialEq)]
149+
pub enum SQLStatement {
150+
/// SELECT
151+
SQLSelect(SQLSelect),
152+
/// INSERT
153+
SQLInsert {
154+
/// TABLE
155+
table_name: String,
156+
/// COLUMNS
157+
columns: Vec<SQLIdent>,
158+
/// VALUES (vector of rows to insert)
159+
values: Vec<Vec<ASTNode>>,
160+
},
161+
SQLCopy {
162+
/// TABLE
163+
table_name: String,
164+
/// COLUMNS
165+
columns: Vec<SQLIdent>,
166+
/// VALUES a vector of values to be copied
167+
values: Vec<Option<String>>,
168+
},
169+
/// UPDATE
170+
SQLUpdate {
171+
/// TABLE
172+
table_name: String,
173+
/// Column assignments
174+
assignments: Vec<SQLAssignment>,
175+
/// WHERE
176+
selection: Option<Box<ASTNode>>,
177+
},
178+
/// DELETE
179+
SQLDelete {
180+
/// FROM
181+
relation: Option<Box<ASTNode>>,
182+
/// WHERE
183+
selection: Option<Box<ASTNode>>,
184+
},
185+
/// CREATE TABLE
186+
SQLCreateTable {
187+
/// Table name
188+
name: String,
189+
/// Optional schema
190+
columns: Vec<SQLColumnDef>,
191+
},
192+
/// ALTER TABLE
193+
SQLAlterTable {
194+
/// Table name
195+
name: String,
196+
operation: AlterOperation,
197+
},
198+
}
199+
200+
impl ToString for SQLStatement {
201+
fn to_string(&self) -> String {
202+
match self {
203+
SQLStatement::SQLSelect(s) => s.to_string(),
204+
SQLStatement::SQLInsert {
190205
table_name,
191206
columns,
192207
values,
@@ -211,7 +226,7 @@ impl ToString for ASTNode {
211226
}
212227
s
213228
}
214-
ASTNode::SQLCopy {
229+
SQLStatement::SQLCopy {
215230
table_name,
216231
columns,
217232
values,
@@ -241,7 +256,7 @@ impl ToString for ASTNode {
241256
s += "\n\\.";
242257
s
243258
}
244-
ASTNode::SQLUpdate {
259+
SQLStatement::SQLUpdate {
245260
table_name,
246261
assignments,
247262
selection,
@@ -262,7 +277,7 @@ impl ToString for ASTNode {
262277
}
263278
s
264279
}
265-
ASTNode::SQLDelete {
280+
SQLStatement::SQLDelete {
266281
relation,
267282
selection,
268283
} => {
@@ -275,7 +290,7 @@ impl ToString for ASTNode {
275290
}
276291
s
277292
}
278-
ASTNode::SQLCreateTable { name, columns } => format!(
293+
SQLStatement::SQLCreateTable { name, columns } => format!(
279294
"CREATE TABLE {} ({})",
280295
name,
281296
columns
@@ -284,7 +299,7 @@ impl ToString for ASTNode {
284299
.collect::<Vec<String>>()
285300
.join(", ")
286301
),
287-
ASTNode::SQLAlterTable { name, operation } => {
302+
SQLStatement::SQLAlterTable { name, operation } => {
288303
format!("ALTER TABLE {} {}", name, operation.to_string())
289304
}
290305
}

src/sqlparser.rs

+35-21
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,30 @@ impl Parser {
5454
}
5555

5656
/// Parse a SQL statement and produce an Abstract Syntax Tree (AST)
57-
pub fn parse_sql(dialect: &Dialect, sql: String) -> Result<ASTNode, ParserError> {
57+
pub fn parse_sql(dialect: &Dialect, sql: String) -> Result<SQLStatement, ParserError> {
5858
let mut tokenizer = Tokenizer::new(dialect, &sql);
5959
let tokens = tokenizer.tokenize()?;
6060
let mut parser = Parser::new(tokens);
61-
parser.parse()
61+
parser.parse_statement()
62+
}
63+
64+
/// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.)
65+
pub fn parse_statement(&mut self) -> Result<SQLStatement, ParserError> {
66+
match self.next_token() {
67+
Some(t) => match t {
68+
Token::SQLWord(w) => match w.keyword.as_ref() {
69+
"SELECT" => Ok(SQLStatement::SQLSelect(self.parse_select()?)),
70+
"CREATE" => Ok(self.parse_create()?),
71+
"DELETE" => Ok(self.parse_delete()?),
72+
"INSERT" => Ok(self.parse_insert()?),
73+
"ALTER" => Ok(self.parse_alter()?),
74+
"COPY" => Ok(self.parse_copy()?),
75+
unexpected => parser_err!(format!("Unexpected statement {:?}", unexpected)),
76+
},
77+
unexpected => parser_err!(format!("Unexpected token {:?}", unexpected)),
78+
},
79+
_ => parser_err!("Unexpected end of file"),
80+
}
6281
}
6382

6483
/// Parse a new expression
@@ -111,12 +130,7 @@ impl Parser {
111130
match self.next_token() {
112131
Some(t) => match t {
113132
Token::SQLWord(w) => match w.keyword.as_ref() {
114-
"SELECT" => Ok(self.parse_select()?),
115-
"CREATE" => Ok(self.parse_create()?),
116-
"DELETE" => Ok(self.parse_delete()?),
117-
"INSERT" => Ok(self.parse_insert()?),
118-
"ALTER" => Ok(self.parse_alter()?),
119-
"COPY" => Ok(self.parse_copy()?),
133+
"SELECT" => Ok(ASTNode::SQLSelect(self.parse_select()?)),
120134
"TRUE" | "FALSE" | "NULL" => {
121135
self.prev_token();
122136
self.parse_sql_value()
@@ -495,7 +509,7 @@ impl Parser {
495509
}
496510

497511
/// Parse a SQL CREATE statement
498-
pub fn parse_create(&mut self) -> Result<ASTNode, ParserError> {
512+
pub fn parse_create(&mut self) -> Result<SQLStatement, ParserError> {
499513
if self.parse_keywords(vec!["TABLE"]) {
500514
let table_name = self.parse_tablename()?;
501515
// parse optional column list (schema)
@@ -562,7 +576,7 @@ impl Parser {
562576
}
563577
}
564578
}
565-
Ok(ASTNode::SQLCreateTable {
579+
Ok(SQLStatement::SQLCreateTable {
566580
name: table_name,
567581
columns,
568582
})
@@ -608,7 +622,7 @@ impl Parser {
608622
}
609623
}
610624

611-
pub fn parse_alter(&mut self) -> Result<ASTNode, ParserError> {
625+
pub fn parse_alter(&mut self) -> Result<SQLStatement, ParserError> {
612626
self.expect_keyword("TABLE")?;
613627
let _ = self.parse_keyword("ONLY");
614628
let table_name = self.parse_tablename()?;
@@ -632,14 +646,14 @@ impl Parser {
632646
self.peek_token()
633647
));
634648
};
635-
Ok(ASTNode::SQLAlterTable {
649+
Ok(SQLStatement::SQLAlterTable {
636650
name: table_name,
637651
operation: operation?,
638652
})
639653
}
640654

641655
/// Parse a copy statement
642-
pub fn parse_copy(&mut self) -> Result<ASTNode, ParserError> {
656+
pub fn parse_copy(&mut self) -> Result<SQLStatement, ParserError> {
643657
let table_name = self.parse_tablename()?;
644658
let columns = if self.consume_token(&Token::LParen) {
645659
let column_names = self.parse_column_names()?;
@@ -652,7 +666,7 @@ impl Parser {
652666
self.expect_keyword("STDIN")?;
653667
self.expect_token(&Token::SemiColon)?;
654668
let values = self.parse_tsv()?;
655-
Ok(ASTNode::SQLCopy {
669+
Ok(SQLStatement::SQLCopy {
656670
table_name,
657671
columns,
658672
values,
@@ -1059,7 +1073,7 @@ impl Parser {
10591073
}
10601074
}
10611075

1062-
pub fn parse_delete(&mut self) -> Result<ASTNode, ParserError> {
1076+
pub fn parse_delete(&mut self) -> Result<SQLStatement, ParserError> {
10631077
let relation: Option<Box<ASTNode>> = if self.parse_keyword("FROM") {
10641078
Some(Box::new(self.parse_expr(0)?))
10651079
} else {
@@ -1081,15 +1095,15 @@ impl Parser {
10811095
next_token
10821096
))
10831097
} else {
1084-
Ok(ASTNode::SQLDelete {
1098+
Ok(SQLStatement::SQLDelete {
10851099
relation,
10861100
selection,
10871101
})
10881102
}
10891103
}
10901104

10911105
/// Parse a SELECT statement
1092-
pub fn parse_select(&mut self) -> Result<ASTNode, ParserError> {
1106+
pub fn parse_select(&mut self) -> Result<SQLSelect, ParserError> {
10931107
let projection = self.parse_expr_list()?;
10941108

10951109
let (relation, joins): (Option<Box<ASTNode>>, Vec<Join>) = if self.parse_keyword("FROM") {
@@ -1139,7 +1153,7 @@ impl Parser {
11391153
next_token
11401154
))
11411155
} else {
1142-
Ok(ASTNode::SQLSelect(SQLSelect {
1156+
Ok(SQLSelect {
11431157
projection,
11441158
selection,
11451159
relation,
@@ -1148,7 +1162,7 @@ impl Parser {
11481162
order_by,
11491163
group_by,
11501164
having,
1151-
}))
1165+
})
11521166
}
11531167
}
11541168

@@ -1287,7 +1301,7 @@ impl Parser {
12871301
}
12881302

12891303
/// Parse an INSERT statement
1290-
pub fn parse_insert(&mut self) -> Result<ASTNode, ParserError> {
1304+
pub fn parse_insert(&mut self) -> Result<SQLStatement, ParserError> {
12911305
self.expect_keyword("INTO")?;
12921306
let table_name = self.parse_tablename()?;
12931307
let columns = if self.consume_token(&Token::LParen) {
@@ -1301,7 +1315,7 @@ impl Parser {
13011315
self.expect_token(&Token::LParen)?;
13021316
let values = self.parse_expr_list()?;
13031317
self.expect_token(&Token::RParen)?;
1304-
Ok(ASTNode::SQLInsert {
1318+
Ok(SQLStatement::SQLInsert {
13051319
table_name,
13061320
columns,
13071321
values: vec![values],

tests/sqlparser_ansi.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use sqlparser::sqltokenizer::*;
99
#[test]
1010
fn parse_simple_select() {
1111
let sql = String::from("SELECT id, fname, lname FROM customer WHERE id = 1");
12-
let ast = parse_sql(&sql);
12+
let ast = parse_sql_expr(&sql);
1313
match ast {
1414
ASTNode::SQLSelect(SQLSelect { projection, .. }) => {
1515
assert_eq!(3, projection.len());
@@ -18,7 +18,7 @@ fn parse_simple_select() {
1818
}
1919
}
2020

21-
fn parse_sql(sql: &str) -> ASTNode {
21+
fn parse_sql_expr(sql: &str) -> ASTNode {
2222
let dialect = AnsiSqlDialect {};
2323
let mut tokenizer = Tokenizer::new(&dialect, &sql);
2424
let tokens = tokenizer.tokenize().unwrap();

0 commit comments

Comments
 (0)