Skip to content

Commit 731f400

Browse files
committed
support general typed string literal
This is a direct port of MaterializeInc/materialize#3146. It provides support for e.g. Postgres syntax where any string literal may be preceded by a type name. Fixes #168.
1 parent d32df52 commit 731f400

File tree

4 files changed

+105
-50
lines changed

4 files changed

+105
-50
lines changed

src/ast/value.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,6 @@ pub enum Value {
3030
HexStringLiteral(String),
3131
/// Boolean value true or false
3232
Boolean(bool),
33-
/// `DATE '...'` literals
34-
Date(String),
35-
/// `TIME '...'` literals
36-
Time(String),
37-
/// `TIMESTAMP '...'` literals
38-
Timestamp(String),
3933
/// INTERVAL literals, roughly in the following format:
4034
/// `INTERVAL '<value>' <leading_field> [ (<leading_precision>) ]
4135
/// [ TO <last_field> [ (<fractional_seconds_precision>) ] ]`,
@@ -67,9 +61,6 @@ impl fmt::Display for Value {
6761
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
6862
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
6963
Value::Boolean(v) => write!(f, "{}", v),
70-
Value::Date(v) => write!(f, "DATE '{}'", escape_single_quote_string(v)),
71-
Value::Time(v) => write!(f, "TIME '{}'", escape_single_quote_string(v)),
72-
Value::Timestamp(v) => write!(f, "TIMESTAMP '{}'", escape_single_quote_string(v)),
7364
Value::Interval {
7465
value,
7566
leading_field: DateTimeField::Second,

src/parser.rs

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,14 @@ macro_rules! parser_err {
3434
};
3535
}
3636

37+
macro_rules! maybe {
38+
($e:expr) => {{
39+
if let Some(v) = $e {
40+
return Ok(v);
41+
}
42+
}};
43+
}
44+
3745
#[derive(PartialEq)]
3846
pub enum IsOptional {
3947
Optional,
@@ -176,6 +184,34 @@ impl Parser {
176184

177185
/// Parse an expression prefix
178186
pub fn parse_prefix(&mut self) -> Result<Expr, ParserError> {
187+
// PostgreSQL allows any string literal to be prceded by a type name, indicating that the
188+
// string literal represents a literal of that type. Some examples:
189+
//
190+
// DATE '2020-05-20'
191+
// TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54'
192+
// BOOL 'true'
193+
//
194+
// The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating
195+
// matters is the fact that INTERVAL string literals may optionally be followed by special
196+
// keywords, e.g.:
197+
//
198+
// INTERVAL '7' DAY
199+
//
200+
// Note also that naively `SELECT date` looks like a syntax error because the `date` type
201+
// name is not followed by a string literal, but in fact is a valid expression that should
202+
// parse as the column name "date".
203+
maybe!(self.maybe_parse(|parser| {
204+
match parser.parse_data_type()? {
205+
DataType::Interval => parser.parse_literal_interval(),
206+
data_type => Ok(Expr::Cast {
207+
expr: Box::new(Expr::Value(Value::SingleQuotedString(
208+
parser.parse_literal_string()?,
209+
))),
210+
data_type,
211+
}),
212+
}
213+
}));
214+
179215
let tok = self
180216
.next_token()
181217
.ok_or_else(|| ParserError::ParserError("Unexpected EOF".to_string()))?;
@@ -187,7 +223,6 @@ impl Parser {
187223
}
188224
"CASE" => self.parse_case_expr(),
189225
"CAST" => self.parse_cast_expr(),
190-
"DATE" => Ok(Expr::Value(Value::Date(self.parse_literal_string()?))),
191226
"EXISTS" => self.parse_exists_expr(),
192227
"EXTRACT" => self.parse_extract_expr(),
193228
"INTERVAL" => self.parse_literal_interval(),
@@ -196,8 +231,6 @@ impl Parser {
196231
op: UnaryOperator::Not,
197232
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
198233
}),
199-
"TIME" => Ok(Expr::Value(Value::Time(self.parse_literal_string()?))),
200-
"TIMESTAMP" => Ok(Expr::Value(Value::Timestamp(self.parse_literal_string()?))),
201234
// Here `w` is a word, check if it's a part of a multi-part
202235
// identifier, a function call, or a simple identifier:
203236
_ => match self.peek_token() {
@@ -912,6 +945,20 @@ impl Parser {
912945
Ok(values)
913946
}
914947

948+
#[must_use]
949+
fn maybe_parse<T, F>(&mut self, mut f: F) -> Option<T>
950+
where
951+
F: FnMut(&mut Parser) -> Result<T, ParserError>,
952+
{
953+
let index = self.index;
954+
if let Ok(t) = f(self) {
955+
Some(t)
956+
} else {
957+
self.index = index;
958+
None
959+
}
960+
}
961+
915962
/// Parse either `ALL` or `DISTINCT`. Returns `true` if `DISTINCT` is parsed and results in a
916963
/// `ParserError` if both `ALL` and `DISTINCT` are fround.
917964
pub fn parse_all_or_distinct(&mut self) -> Result<bool, ParserError> {
@@ -1908,7 +1955,6 @@ impl Parser {
19081955
}
19091956

19101957
if self.consume_token(&Token::LParen) {
1911-
let index = self.index;
19121958
// A left paren introduces either a derived table (i.e., a subquery)
19131959
// or a nested join. It's nearly impossible to determine ahead of
19141960
// time which it is... so we just try to parse both.
@@ -1925,30 +1971,20 @@ impl Parser {
19251971
// | (2) starts a nested join
19261972
// (1) an additional set of parens around a nested join
19271973
//
1928-
match self.parse_derived_table_factor(NotLateral) {
1929-
// The recently consumed '(' started a derived table, and we've
1930-
// parsed the subquery, followed by the closing ')', and the
1931-
// alias of the derived table. In the example above this is
1932-
// case (3), and the next token would be `NATURAL`.
1933-
Ok(table_factor) => Ok(table_factor),
1934-
Err(_) => {
1935-
// A parsing error from `parse_derived_table_factor` indicates that
1936-
// the '(' we've recently consumed does not start a derived table
1937-
// (cases 1, 2, or 4). Ignore the error and back up to where we
1938-
// were before - right after the opening '('.
1939-
self.index = index;
1940-
1941-
// Inside the parentheses we expect to find a table factor
1942-
// followed by some joins or another level of nesting.
1943-
let table_and_joins = self.parse_table_and_joins()?;
1944-
self.expect_token(&Token::RParen)?;
1945-
// The SQL spec prohibits derived and bare tables from appearing
1946-
// alone in parentheses. We don't enforce this as some databases
1947-
// (e.g. Snowflake) allow such syntax.
19481974

1949-
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
1950-
}
1951-
}
1975+
// Check if the recently consumed '(' started a derived table, in which case we've
1976+
// parsed the subquery, followed by the closing ')', and the alias of the derived
1977+
// table. In the example above this is case (3), or another nested join (2).
1978+
maybe!(self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral)));
1979+
1980+
// Inside the parentheses we expect to find a table factor
1981+
// followed by some joins or another level of nesting.
1982+
let table_and_joins = self.parse_table_and_joins()?;
1983+
self.expect_token(&Token::RParen)?;
1984+
// The SQL spec prohibits derived and bare tables from appearing
1985+
// alone in parentheses. We don't enforce this as some databases
1986+
// (e.g. Snowflake) allow such syntax.
1987+
Ok(TableFactor::NestedJoin(Box::new(table_and_joins)))
19521988
} else {
19531989
let name = self.parse_object_name()?;
19541990
// Postgres, MSSQL: table-valued functions:

tests/sqlparser_common.rs

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,19 @@ fn parse_null_in_select() {
412412
);
413413
}
414414

415+
#[test]
416+
fn parse_select_with_date_column_name() {
417+
let sql = "SELECT date";
418+
let select = verified_only_select(sql);
419+
assert_eq!(
420+
&Expr::Identifier(Ident {
421+
value: "date".into(),
422+
quote_style: None
423+
}),
424+
expr_from_projection(only(&select.projection)),
425+
);
426+
}
427+
415428
#[test]
416429
fn parse_escaped_single_quote_string_predicate() {
417430
use self::BinaryOperator::*;
@@ -554,16 +567,19 @@ fn parse_not_precedence() {
554567
);
555568

556569
// NOT has lower precedence than LIKE, so the following parses as NOT ('a' NOT LIKE 'b')
557-
let sql = "NOT 'a' NOT LIKE 'b'";
570+
let sql = "CAST('a' AS NOT) NOT LIKE 'b'";
558571
assert_eq!(
559572
verified_expr(sql),
560-
Expr::UnaryOp {
561-
op: UnaryOperator::Not,
562-
expr: Box::new(Expr::BinaryOp {
563-
left: Box::new(Expr::Value(Value::SingleQuotedString("a".into()))),
564-
op: BinaryOperator::NotLike,
565-
right: Box::new(Expr::Value(Value::SingleQuotedString("b".into()))),
573+
Expr::BinaryOp {
574+
left: Box::new(Expr::Cast {
575+
expr: Box::new(Expr::Value(Value::SingleQuotedString("a".into()))),
576+
data_type: DataType::Custom(ObjectName(vec![Ident {
577+
value: "NOT".into(),
578+
quote_style: None
579+
}]))
566580
}),
581+
op: BinaryOperator::NotLike,
582+
right: Box::new(Expr::Value(Value::SingleQuotedString("b".into())))
567583
},
568584
);
569585

@@ -1423,30 +1439,41 @@ fn parse_literal_string() {
14231439

14241440
#[test]
14251441
fn parse_literal_date() {
1426-
let sql = "SELECT DATE '1999-01-01'";
1442+
let sql = "SELECT CAST('1999-01-01' AS date)";
14271443
let select = verified_only_select(sql);
14281444
assert_eq!(
1429-
&Expr::Value(Value::Date("1999-01-01".into())),
1445+
&Expr::Cast {
1446+
expr: Box::new(Expr::Value(Value::SingleQuotedString("1999-01-01".into()))),
1447+
data_type: DataType::Date
1448+
},
14301449
expr_from_projection(only(&select.projection)),
14311450
);
14321451
}
14331452

14341453
#[test]
14351454
fn parse_literal_time() {
1436-
let sql = "SELECT TIME '01:23:34'";
1455+
let sql = "SELECT CAST('01:23:34' AS time)";
14371456
let select = verified_only_select(sql);
14381457
assert_eq!(
1439-
&Expr::Value(Value::Time("01:23:34".into())),
1458+
&Expr::Cast {
1459+
expr: Box::new(Expr::Value(Value::SingleQuotedString("01:23:34".into()))),
1460+
data_type: DataType::Time
1461+
},
14401462
expr_from_projection(only(&select.projection)),
14411463
);
14421464
}
14431465

14441466
#[test]
14451467
fn parse_literal_timestamp() {
1446-
let sql = "SELECT TIMESTAMP '1999-01-01 01:23:34'";
1468+
let sql = "SELECT CAST('1999-01-01 01:23:34' AS timestamp)";
14471469
let select = verified_only_select(sql);
14481470
assert_eq!(
1449-
&Expr::Value(Value::Timestamp("1999-01-01 01:23:34".into())),
1471+
&Expr::Cast {
1472+
expr: Box::new(Expr::Value(Value::SingleQuotedString(
1473+
"1999-01-01 01:23:34".into()
1474+
))),
1475+
data_type: DataType::Timestamp
1476+
},
14501477
expr_from_projection(only(&select.projection)),
14511478
);
14521479
}

tests/sqlparser_mssql.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ fn parse_mssql_identifiers() {
4141

4242
#[test]
4343
fn parse_mssql_single_quoted_aliases() {
44-
let _ = ms_and_generic().one_statement_parses_to("SELECT foo 'alias'", "SELECT foo AS 'alias'");
44+
let _ = ms_and_generic()
45+
.one_statement_parses_to("SELECT foo 'alias'", "SELECT CAST('alias' AS foo)");
4546
}
4647

4748
#[test]

0 commit comments

Comments
 (0)