Skip to content

Commit 51cb5c7

Browse files
committed
support general typed string literal
This is a direct port of MaterializeInc/materialize#3146. It provides support for e.g. Postgres syntax where any string literal may be preceded by a type name. Fixes #168.
1 parent 0fe3a8e commit 51cb5c7

File tree

4 files changed

+110
-42
lines changed

4 files changed

+110
-42
lines changed

src/ast/mod.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ pub enum Expr {
210210
Nested(Box<Expr>),
211211
/// A literal value, such as string, number, date or NULL
212212
Value(Value),
213+
/// A constant of form `<data_type> 'value'`.
214+
/// This can represent ANSI SQL `DATE`, `TIME`, and `TIMESTAMP` literals (such as `DATE '2020-01-01'`),
215+
/// as well as constants of other types (a non-standard PostgreSQL extension).
216+
TypedString { data_type: DataType, value: String },
213217
/// Scalar function call e.g. `LEFT(foo, 5)`
214218
Function(Function),
215219
/// `CASE [<operand>] WHEN <condition> THEN <result> ... [ELSE <result>] END`
@@ -284,6 +288,10 @@ impl fmt::Display for Expr {
284288
Expr::Collate { expr, collation } => write!(f, "{} COLLATE {}", expr, collation),
285289
Expr::Nested(ast) => write!(f, "({})", ast),
286290
Expr::Value(v) => write!(f, "{}", v),
291+
Expr::TypedString { data_type, value } => {
292+
write!(f, "{}", data_type)?;
293+
write!(f, " '{}'", &value::escape_single_quote_string(value))
294+
}
287295
Expr::Function(fun) => write!(f, "{}", fun),
288296
Expr::Case {
289297
operand,

src/ast/value.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,6 @@ pub enum Value {
3333
HexStringLiteral(String),
3434
/// Boolean value true or false
3535
Boolean(bool),
36-
/// `DATE '...'` literals
37-
Date(String),
38-
/// `TIME '...'` literals
39-
Time(String),
40-
/// `TIMESTAMP '...'` literals
41-
Timestamp(String),
4236
/// INTERVAL literals, roughly in the following format:
4337
/// `INTERVAL '<value>' [ <leading_field> [ (<leading_precision>) ] ]
4438
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
@@ -70,9 +64,6 @@ impl fmt::Display for Value {
7064
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
7165
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
7266
Value::Boolean(v) => write!(f, "{}", v),
73-
Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)),
74-
Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)),
75-
Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)),
7667
Value::Interval {
7768
value,
7869
leading_field: Some(DateTimeField::Second),

src/parser.rs

Lines changed: 74 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,15 @@ macro_rules! parser_err {
3434
};
3535
}
3636

37+
// Returns a successful result if the optional expression is some
38+
macro_rules! return_ok_if_some {
39+
($e:expr) => {{
40+
if let Some(v) = $e {
41+
return Ok(v);
42+
}
43+
}};
44+
}
45+
3746
#[derive(PartialEq)]
3847
pub enum IsOptional {
3948
Optional,
@@ -171,6 +180,38 @@ impl Parser {
171180

172181
/// Parse an expression prefix
173182
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
183+
// PostgreSQL allows any string literal to be preceded by a type name, indicating that the
184+
// string literal represents a literal of that type. Some examples:
185+
//
186+
// DATE '2020-05-20'
187+
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
188+
// BOOL 'true'
189+
//
190+
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
191+
// matters is the fact that INTERVAL string literals may optionally be followed by special
192+
// keywords, e.g.:
193+
//
194+
// INTERVAL '7' DAY
195+
//
196+
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
197+
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
198+
// expression that should parse as the column name "date".
199+
return_ok_if_some!(self.maybe_parse(|parser| {
200+
match parser.parse_data_type()? {
201+
DataType::Interval => parser.parse_literal_interval(),
202+
// Single-quoted strings are parsed as custom data types, however this not desirable
203+
// when we are handling input like `"NOT 'a' NOT LIKE 'b'"` because this will produce a
204+
// TypedString instead of a SingleQuotedString. Further, this leads to issues where the
205+
// same input will yield a BinaryOperator instead of the correct UnaryOperator. Here we
206+
// handle that specific case by returning an error.
207+
DataType::Custom(..) => parser_err!("dummy"),
208+
data_type => Ok(Expr::TypedString {
209+
data_type,
210+
value: parser.parse_literal_string()?,
211+
}),
212+
}
213+
}));
214+
174215
let expr = match self.next_token() {
175216
Token::Word(w) => match w.keyword.as_ref() {
176217
"TRUE" | "FALSE" | "NULL" => {
@@ -179,7 +220,6 @@ impl Parser {
179220
}
180221
"CASE" => self.parse_case_expr(),
181222
"CAST" => self.parse_cast_expr(),
182-
"DATE" => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
183223
"EXISTS" => self.parse_exists_expr(),
184224
"EXTRACT" => self.parse_extract_expr(),
185225
"INTERVAL" => self.parse_literal_interval(),
@@ -188,8 +228,6 @@ impl Parser {
188228
op: UnaryOperator::Not,
189229
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
190230
}),
191-
"TIME" => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
192-
"TIMESTAMP" => Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?))),
193231
// Here `w` is a word, check if it's a part of a multi-part
194232
// identifier, a function call, or a simple identifier:
195233
_ => match self.peek_token() {
@@ -895,6 +933,20 @@ impl Parser {
895933
Ok(values)
896934
}
897935

936+
#[must_use]
937+
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
938+
where
939+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
940+
{
941+
let index = self.index;
942+
if let Ok(t) = f(self) {
943+
Some(t)
944+
} else {
945+
self.index = index;
946+
None
947+
}
948+
}
949+
898950
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
899951
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
900952
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
@@ -1877,7 +1929,6 @@ impl Parser {
18771929
}
18781930

18791931
if self.consume_token(&Token::LParen) {
1880-
let index = self.index;
18811932
// A left paren introduces either a derived table (i.e., a subquery)
18821933
// or a nested join. It's nearly impossible to determine ahead of
18831934
// time which it is... so we just try to parse both.
@@ -1894,30 +1945,26 @@ impl Parser {
18941945
// | (2) starts a nested join
18951946
// (1) an additional set of parens around a nested join
18961947
//
1897-
match self.parse_derived_table_factor(NotLateral) {
1898-
// The recently consumed '(' started a derived table, and we've
1899-
// parsed the subquery, followed by the closing ')', and the
1900-
// alias of the derived table. In the example above this is
1901-
// case (3), and the next token would be `NATURAL`.
1902-
Ok(table_factor) => Ok(table_factor),
1903-
Err(_) => {
1904-
// A parsing error from `parse_derived_table_factor` indicates that
1905-
// the '(' we've recently consumed does not start a derived table
1906-
// (cases 1, 2, or 4). Ignore the error and back up to where we
1907-
// were before - right after the opening '('.
1908-
self.index = index;
1909-
1910-
// Inside the parentheses we expect to find a table factor
1911-
// followed by some joins or another level of nesting.
1912-
let table_and_joins = self.parse_table_and_joins()?;
1913-
self.expect_token(&Token::RParen)?;
1914-
// The SQL spec prohibits derived and bare tables from appearing
1915-
// alone in parentheses. We don't enforce this as some databases
1916-
// (e.g. Snowflake) allow such syntax.
19171948

1918-
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
1919-
}
1920-
}
1949+
// Check if the recently consumed '(' started a derived table, in which case we've
1950+
// parsed the subquery, followed by the closing ')', and the alias of the derived
1951+
// table. In the example above this is case (3).
1952+
//
1953+
// A parsing error from `parse_derived_table_factor` indicates that the '(' we've
1954+
// recently consumed does not start a derived table (cases 1, 2, or 4). Ignore the
1955+
// error and back up to where we after the opening '('.
1956+
return_ok_if_some!(
1957+
self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))
1958+
);
1959+
1960+
// Inside the parentheses we expect to find a table factor
1961+
// followed by some joins or another level of nesting.
1962+
let table_and_joins = self.parse_table_and_joins()?;
1963+
self.expect_token(&Token::RParen)?;
1964+
// The SQL spec prohibits derived and bare tables from appearing
1965+
// alone in parentheses. We don't enforce this as some databases
1966+
// (e.g. Snowflake) allow such syntax.
1967+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
19211968
} else {
19221969
let name = self.parse_object_name()?;
19231970
// Postgres, MSSQL: table-valued functions:

tests/sqlparser_common.rs

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -413,6 +413,19 @@ fn parse_null_in_select() {
413413
);
414414
}
415415

416+
#[test]
417+
fn parse_select_with_date_column_name() {
418+
let sql = "SELECT date";
419+
let select = verified_only_select(sql);
420+
assert_eq!(
421+
&Expr::Identifier(Ident {
422+
value: "date".into(),
423+
quote_style: None
424+
}),
425+
expr_from_projection(only(&select.projection)),
426+
);
427+
}
428+
416429
#[test]
417430
fn parse_escaped_single_quote_string_predicate() {
418431
use self::BinaryOperator::*;
@@ -1424,30 +1437,39 @@ fn parse_literal_string() {
14241437

14251438
#[test]
14261439
fn parse_literal_date() {
1427-
let sql = "SELECT DATE '1999-01-01'";
1440+
let sql = "SELECT date '1999-01-01'";
14281441
let select = verified_only_select(sql);
14291442
assert_eq!(
1430-
&Expr::Value(Value::Date("1999-01-01".into())),
1443+
&Expr::TypedString {
1444+
data_type: DataType::Date,
1445+
value: "1999-01-01".into()
1446+
},
14311447
expr_from_projection(only(&select.projection)),
14321448
);
14331449
}
14341450

14351451
#[test]
14361452
fn parse_literal_time() {
1437-
let sql = "SELECT TIME '01:23:34'";
1453+
let sql = "SELECT time '01:23:34'";
14381454
let select = verified_only_select(sql);
14391455
assert_eq!(
1440-
&Expr::Value(Value::Time("01:23:34".into())),
1456+
&Expr::TypedString {
1457+
data_type: DataType::Time,
1458+
value: "01:23:34".into()
1459+
},
14411460
expr_from_projection(only(&select.projection)),
14421461
);
14431462
}
14441463

14451464
#[test]
14461465
fn parse_literal_timestamp() {
1447-
let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'";
1466+
let sql = "SELECT timestamp '1999-01-01 01:23:34'";
14481467
let select = verified_only_select(sql);
14491468
assert_eq!(
1450-
&Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())),
1469+
&Expr::TypedString {
1470+
data_type: DataType::Timestamp,
1471+
value: "1999-01-01 01:23:34".into()
1472+
},
14511473
expr_from_projection(only(&select.projection)),
14521474
);
14531475
}

0 commit comments

Comments
 (0)