Skip to content

Commit d6b24fb

Browse files
committed
sql-parser: support general typed string literals
Support PostgreSQL's typed string literals, as in: SELECT bool 'true' Previously we only supported the typed string literals for the date, timestamp, and timestamp with time zone types as required by the SQL standard, but PostgreSQL generalizes this syntax for all types. As a side effect, this fixes the bug where SELECT date parsed as a syntax error rather than as selecting a column named "date". The fix also generalizes, e.g., `SELECT bool` is also valid. Fix #2577.
1 parent 959b9e4 commit d6b24fb

File tree

8 files changed

+161
-140
lines changed

8 files changed

+161
-140
lines changed

src/sql-parser/src/ast/mod.rs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,10 @@ pub enum Expr {
239239
right: Box<Expr>,
240240
},
241241
/// Unary operation e.g. `NOT foo`
242-
UnaryOp { op: UnaryOperator, expr: Box<Expr> },
242+
UnaryOp {
243+
op: UnaryOperator,
244+
expr: Box<Expr>,
245+
},
243246
/// CAST an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))`
244247
Cast {
245248
expr: Box<Expr>,
@@ -258,6 +261,10 @@ pub enum Expr {
258261
Nested(Box<Expr>),
259262
/// A literal value, such as string, number, date or NULL
260263
Value(Value),
264+
TypedString {
265+
data_type: DataType,
266+
value: String,
267+
},
261268
/// Scalar function call e.g. `LEFT(foo, 5)`
262269
Function(Function),
263270
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
@@ -393,6 +400,12 @@ impl AstDisplay for Expr {
393400
Expr::Value(v) => {
394401
f.write_node(v);
395402
}
403+
Expr::TypedString { data_type, value } => {
404+
f.write_node(data_type);
405+
f.write_str(" '");
406+
f.write_node(&value::escape_single_quote_string(value));
407+
f.write_str("'");
408+
}
396409
Expr::Function(fun) => {
397410
f.write_node(fun);
398411
}

src/sql-parser/src/ast/value.rs

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -48,14 +48,6 @@ pub enum Value {
4848
HexStringLiteral(String),
4949
/// Boolean value true or false
5050
Boolean(bool),
51-
/// `DATE '...'` literals
52-
Date(String),
53-
/// `TIME '...'` literals
54-
Time(String),
55-
/// `TIMESTAMP '...'` literals
56-
Timestamp(String),
57-
/// `TIMESTAMP WITH TIME ZONE` literals
58-
TimestampTz(String),
5951
/// INTERVAL literals, roughly in the following format:
6052
///
6153
/// ```text
@@ -83,26 +75,6 @@ impl AstDisplay for Value {
8375
f.write_str("'");
8476
}
8577
Value::Boolean(v) => f.write_str(v),
86-
Value::Date(v) => {
87-
f.write_str("DATE '");
88-
f.write_node(&escape_single_quote_string(v));
89-
f.write_str("'");
90-
}
91-
Value::Time(v) => {
92-
f.write_str("TIME '");
93-
f.write_node(&escape_single_quote_string(v));
94-
f.write_str("'");
95-
}
96-
Value::Timestamp(v) => {
97-
f.write_str("TIMESTAMP '");
98-
f.write_node(&escape_single_quote_string(v));
99-
f.write_str("'");
100-
}
101-
Value::TimestampTz(v) => {
102-
f.write_str("TIMESTAMP WITH TIME ZONE '");
103-
f.write_node(&escape_single_quote_string(v));
104-
f.write_str("'");
105-
}
10678
Value::Interval(IntervalValue {
10779
value,
10880
precision_high: _,

src/sql-parser/src/ast/visit_macro.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,10 @@ macro_rules! make_visitor {
243243

244244
fn visit_value(&mut self, _val: &'ast $($mut)* Value) {}
245245

246+
fn visit_typed_string(&mut self, data_type: &'ast $($mut)* DataType, value: &'ast $($mut)* String) {
247+
visit_typed_string(self, data_type, value);
248+
}
249+
246250
fn visit_function(&mut self, func: &'ast $($mut)* Function) {
247251
visit_function(self, func)
248252
}
@@ -1013,6 +1017,7 @@ macro_rules! make_visitor {
10131017
Expr::Extract { field, expr } => visitor.visit_extract(field, expr),
10141018
Expr::Nested(expr) => visitor.visit_nested(expr),
10151019
Expr::Value(val) => visitor.visit_value(val),
1020+
Expr::TypedString { data_type, value } => visitor.visit_typed_string(data_type, value),
10161021
Expr::Function(func) => visitor.visit_function(func),
10171022
Expr::Case {
10181023
operand,
@@ -1173,6 +1178,10 @@ macro_rules! make_visitor {
11731178
visitor.visit_expr(expr);
11741179
}
11751180

1181+
pub fn visit_typed_string<'ast, V: $name<'ast> + ?Sized>(visitor: &mut V, data_type: &'ast $($mut)* DataType, _value: &'ast $($mut)* String) {
1182+
visitor.visit_type(data_type);
1183+
}
1184+
11761185
pub fn visit_function<'ast, V: $name<'ast> + ?Sized>(visitor: &mut V, func: &'ast $($mut)* Function) {
11771186
visitor.visit_object_name(&$($mut)* func.name);
11781187
visitor.visit_function_args(&$($mut)* func.args);

src/sql-parser/src/parser.rs

Lines changed: 78 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,14 @@ macro_rules! parser_err {
4343
};
4444
}
4545

46+
macro_rules! maybe {
47+
($e:expr) => {{
48+
if let Some(v) = $e {
49+
return Ok(v);
50+
}
51+
}};
52+
}
53+
4654
#[derive(Debug, Clone, PartialEq)]
4755
pub struct ParserError {
4856
/// Original query (so we can easily print an error)
@@ -245,6 +253,34 @@ impl Parser {
245253

246254
/// Parse an expression prefix
247255
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
256+
// PostgreSQL allows any string literal to be preceded by a type name,
257+
// indicating that the string literal represents a literal of that type.
258+
// Some examples:
259+
//
260+
// DATE '2020-05-20'
261+
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
262+
// BOOL 'true'
263+
//
264+
// The first two are standard SQL, while the latter is a PostgreSQL
265+
// extension. Complicating matters is the fact that INTERVAL string
266+
// literals may optionally be followed by some special keywords, e.g.:
267+
//
268+
// INTERVAL '7' DAY
269+
//
270+
// Note also that naively `SELECT date` looks like a syntax error
271+
// because the `date` type name is not followed by a string literal, but
272+
// in fact is a valid expression that should parse as the column name
273+
// "date".
274+
maybe!(self.maybe_parse(|parser| {
275+
match parser.parse_data_type()? {
276+
DataType::Interval => parser.parse_literal_interval(),
277+
data_type => Ok(Expr::TypedString {
278+
data_type,
279+
value: parser.parse_literal_string()?,
280+
}),
281+
}
282+
}));
283+
248284
let tok = self
249285
.next_token()
250286
.ok_or_else(|| self.error(self.peek_prev_range(), "Unexpected EOF".to_string()))?;
@@ -257,17 +293,13 @@ impl Parser {
257293
"LIST" => self.parse_list(),
258294
"CASE" => self.parse_case_expr(),
259295
"CAST" => self.parse_cast_expr(),
260-
"DATE" => Ok(Expr::Value(self.parse_date()?)),
261296
"EXISTS" => self.parse_exists_expr(),
262297
"EXTRACT" => self.parse_extract_expr(),
263298
"INTERVAL" => self.parse_literal_interval(),
264299
"NOT" => Ok(Expr::UnaryOp {
265300
op: UnaryOperator::Not,
266301
expr: Box::new(self.parse_subexpr(Precedence::UnaryNot)?),
267302
}),
268-
"TIME" => Ok(Expr::Value(self.parse_time()?)),
269-
"TIMESTAMP" => self.parse_timestamp(),
270-
"TIMESTAMPTZ" => self.parse_timestamptz(),
271303
// Here `w` is a word, check if it's a part of a multi-part
272304
// identifier, a function call, or a simple identifier:
273305
w if keywords::RESERVED_FOR_EXPRESSIONS.contains(&w) => {
@@ -546,33 +578,6 @@ impl Parser {
546578
}
547579
}
548580

549-
fn parse_date(&mut self) -> Result<Value, ParserError> {
550-
let value = self.parse_literal_string()?;
551-
Ok(Value::Date(value))
552-
}
553-
554-
fn parse_time(&mut self) -> Result<Value, ParserError> {
555-
let value = self.parse_literal_string()?;
556-
Ok(Value::Time(value))
557-
}
558-
559-
fn parse_timestamp(&mut self) -> Result<Expr, ParserError> {
560-
if self.parse_keyword("WITH") {
561-
self.expect_keywords(&["TIME", "ZONE"])?;
562-
let value = self.parse_literal_string()?;
563-
return Ok(Expr::Value(Value::TimestampTz(value)));
564-
} else if self.parse_keyword("WITHOUT") {
565-
self.expect_keywords(&["TIME", "ZONE"])?;
566-
}
567-
let value = self.parse_literal_string()?;
568-
Ok(Expr::Value(Value::Timestamp(value)))
569-
}
570-
571-
fn parse_timestamptz(&mut self) -> Result<Expr, ParserError> {
572-
let value = self.parse_literal_string()?;
573-
Ok(Expr::Value(Value::TimestampTz(value)))
574-
}
575-
576581
/// Parse an INTERVAL literal.
577582
///
578583
/// Some syntactically valid intervals:
@@ -1103,6 +1108,20 @@ impl Parser {
11031108
Ok(values)
11041109
}
11051110

1111+
#[must_use]
1112+
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
1113+
where
1114+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
1115+
{
1116+
let index = self.index;
1117+
if let Ok(t) = f(self) {
1118+
Some(t)
1119+
} else {
1120+
self.index = index;
1121+
None
1122+
}
1123+
}
1124+
11061125
/// Parse a SQL CREATE statement
11071126
pub fn parse_create(&mut self) -> Result<Statement, ParserError> {
11081127
if self.parse_keyword("DATABASE") {
@@ -1895,7 +1914,7 @@ impl Parser {
18951914
}
18961915
// Interval types can be followed by a complicated interval
18971916
// qualifier that we don't currently support. See
1898-
// parse_interval_literal for a taste.
1917+
// parse_literal_interval for a taste.
18991918
"INTERVAL" => DataType::Interval,
19001919
"REGCLASS" => DataType::Regclass,
19011920
"TEXT" | "STRING" => DataType::Text,
@@ -2498,7 +2517,6 @@ impl Parser {
24982517
}
24992518

25002519
if self.consume_token(&Token::LParen) {
2501-
let index = self.index;
25022520
// A left paren introduces either a derived table (i.e., a subquery)
25032521
// or a nested join. It's nearly impossible to determine ahead of
25042522
// time which it is... so we just try to parse both.
@@ -2515,39 +2533,36 @@ impl Parser {
25152533
// | (2) starts a nested join
25162534
// (1) an additional set of parens around a nested join
25172535
//
2518-
match self.parse_derived_table_factor(NotLateral) {
2519-
// The recently consumed '(' started a derived table, and we've
2520-
// parsed the subquery, followed by the closing ')', and the
2521-
// alias of the derived table. In the example above this is
2522-
// case (3), and the next token would be `NATURAL`.
2523-
Ok(table_factor) => Ok(table_factor),
2524-
Err(_) => {
2525-
// The '(' we've recently consumed does not start a derived
2526-
// table. For valid input this can happen either when the
2527-
// token following the paren can't start a query (e.g. `foo`
2528-
// in `FROM (foo NATURAL JOIN bar)`, or when the '(' we've
2529-
// consumed is followed by another '(' that starts a
2530-
// derived table, like (3), or another nested join (2).
2531-
//
2532-
// Ignore the error and back up to where we were before.
2533-
// Either we'll be able to parse a valid nested join, or
2534-
// we won't, and we'll return that error instead.
2535-
self.index = index;
2536-
let table_and_joins = self.parse_table_and_joins()?;
2537-
match table_and_joins.relation {
2538-
TableFactor::NestedJoin { .. } => (),
2539-
_ => {
2540-
if table_and_joins.joins.is_empty() {
2541-
// The SQL spec prohibits derived tables and bare
2542-
// tables from appearing alone in parentheses.
2543-
self.expected(self.peek_range(), "joined table", self.peek_token())?
2544-
}
2545-
}
2536+
2537+
// Check if the recently consumed '(' started a derived table, in
2538+
// which case we've parsed the subquery, followed by the closing
2539+
// ')', and the alias of the derived table. In the example above
2540+
// this is case (3), and the next token would be `NATURAL`.
2541+
maybe!(self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral)));
2542+
2543+
// The '(' we've recently consumed does not start a derived table.
2544+
// For valid input this can happen either when the token following
2545+
// the paren can't start a query (e.g. `foo` in `FROM (foo NATURAL
2546+
// JOIN bar)`, or when the '(' we've consumed is followed by another
2547+
// '(' that starts a derived table, like (3), or another nested join
2548+
// (2).
2549+
//
2550+
// Ignore the error and back up to where we were before. Either
2551+
// we'll be able to parse a valid nested join, or we won't, and
2552+
// we'll return that error instead.
2553+
let table_and_joins = self.parse_table_and_joins()?;
2554+
match table_and_joins.relation {
2555+
TableFactor::NestedJoin { .. } => (),
2556+
_ => {
2557+
if table_and_joins.joins.is_empty() {
2558+
// The SQL spec prohibits derived tables and bare
2559+
// tables from appearing alone in parentheses.
2560+
self.expected(self.peek_range(), "joined table", self.peek_token())?
25462561
}
2547-
self.expect_token(&Token::RParen)?;
2548-
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
25492562
}
25502563
}
2564+
self.expect_token(&Token::RParen)?;
2565+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
25512566
} else {
25522567
let name = self.parse_object_name()?;
25532568
// Postgres, MSSQL: table-valued functions:

0 commit comments

Comments
 (0)