Skip to content

Commit aa714e3

Browse files
authored
Fix INTERVAL parsing to support expressions and units via dialect (#1398)
1 parent 1bed87a commit aa714e3

File tree

10 files changed

+331
-134
lines changed

10 files changed

+331
-134
lines changed

src/dialect/ansi.rs

+4
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,8 @@ impl Dialect for AnsiDialect {
2424
fn is_identifier_part(&self, ch: char) -> bool {
2525
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
2626
}
27+
28+
fn require_interval_qualifier(&self) -> bool {
29+
true
30+
}
2731
}

src/dialect/bigquery.rs

+4
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,8 @@ impl Dialect for BigQueryDialect {
6363
fn supports_select_wildcard_except(&self) -> bool {
6464
true
6565
}
66+
67+
fn require_interval_qualifier(&self) -> bool {
68+
true
69+
}
6670
}

src/dialect/clickhouse.rs

+4
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,8 @@ impl Dialect for ClickHouseDialect {
3737
fn describe_requires_table_keyword(&self) -> bool {
3838
true
3939
}
40+
41+
fn require_interval_qualifier(&self) -> bool {
42+
true
43+
}
4044
}

src/dialect/databricks.rs

+4
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,8 @@ impl Dialect for DatabricksDialect {
3838
fn supports_select_wildcard_except(&self) -> bool {
3939
true
4040
}
41+
42+
fn require_interval_qualifier(&self) -> bool {
43+
true
44+
}
4145
}

src/dialect/hive.rs

+4
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,8 @@ impl Dialect for HiveDialect {
4242
fn supports_numeric_prefix(&self) -> bool {
4343
true
4444
}
45+
46+
fn require_interval_qualifier(&self) -> bool {
47+
true
48+
}
4549
}

src/dialect/mod.rs

+21
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,27 @@ pub trait Dialect: Debug + Any {
515515
fn supports_create_index_with_clause(&self) -> bool {
516516
false
517517
}
518+
519+
/// Whether `INTERVAL` expressions require units (called "qualifiers" in the ANSI SQL spec) to be specified,
520+
/// e.g. `INTERVAL 1 DAY` vs `INTERVAL 1`.
521+
///
522+
/// Expressions within intervals (e.g. `INTERVAL '1' + '1' DAY`) are only allowed when units are required.
523+
///
524+
/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1398> for more information.
525+
///
526+
/// When `true`:
527+
/// * `INTERVAL '1' DAY` is VALID
528+
/// * `INTERVAL 1 + 1 DAY` is VALID
529+
/// * `INTERVAL '1' + '1' DAY` is VALID
530+
/// * `INTERVAL '1'` is INVALID
531+
///
532+
/// When `false`:
533+
/// * `INTERVAL '1'` is VALID
534+
/// * `INTERVAL '1' DAY` is VALID — unit is not required, but still allowed
535+
/// * `INTERVAL 1 + 1 DAY` is INVALID
536+
fn require_interval_qualifier(&self) -> bool {
537+
false
538+
}
518539
}
519540

520541
/// This represents the operators for which precedence must be defined

src/dialect/mysql.rs

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ impl Dialect for MySqlDialect {
8484
None
8585
}
8686
}
87+
88+
fn require_interval_qualifier(&self) -> bool {
89+
true
90+
}
8791
}
8892

8993
/// `LOCK TABLES`

src/parser/mod.rs

+65-80
Original file line numberDiff line numberDiff line change
@@ -56,15 +56,6 @@ macro_rules! parser_err {
5656
};
5757
}
5858

59-
// Returns a successful result if the optional expression is some
60-
macro_rules! return_ok_if_some {
61-
($e:expr) => {{
62-
if let Some(v) = $e {
63-
return Ok(v);
64-
}
65-
}};
66-
}
67-
6859
#[cfg(feature = "std")]
6960
/// Implementation [`RecursionCounter`] if std is available
7061
mod recursion {
@@ -928,35 +919,6 @@ impl<'a> Parser<'a> {
928919
Ok(expr)
929920
}
930921

931-
pub fn parse_interval_expr(&mut self) -> Result<Expr, ParserError> {
932-
let precedence = self.dialect.prec_unknown();
933-
let mut expr = self.parse_prefix()?;
934-
935-
loop {
936-
let next_precedence = self.get_next_interval_precedence()?;
937-
938-
if precedence >= next_precedence {
939-
break;
940-
}
941-
942-
expr = self.parse_infix(expr, next_precedence)?;
943-
}
944-
945-
Ok(expr)
946-
}
947-
948-
/// Get the precedence of the next token, with AND, OR, and XOR.
949-
pub fn get_next_interval_precedence(&self) -> Result<u8, ParserError> {
950-
let token = self.peek_token();
951-
952-
match token.token {
953-
Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()),
954-
Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()),
955-
Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()),
956-
_ => self.get_next_precedence(),
957-
}
958-
}
959-
960922
pub fn parse_assert(&mut self) -> Result<Statement, ParserError> {
961923
let condition = self.parse_expr()?;
962924
let message = if self.parse_keyword(Keyword::AS) {
@@ -1004,7 +966,7 @@ impl<'a> Parser<'a> {
1004966
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
1005967
// expression that should parse as the column name "date".
1006968
let loc = self.peek_token().location;
1007-
return_ok_if_some!(self.maybe_parse(|parser| {
969+
let opt_expr = self.maybe_parse(|parser| {
1008970
match parser.parse_data_type()? {
1009971
DataType::Interval => parser.parse_interval(),
1010972
// PostgreSQL allows almost any identifier to be used as custom data type name,
@@ -1020,7 +982,11 @@ impl<'a> Parser<'a> {
1020982
value: parser.parse_literal_string()?,
1021983
}),
1022984
}
1023-
}));
985+
});
986+
987+
if let Some(expr) = opt_expr {
988+
return Ok(expr);
989+
}
1024990

1025991
let next_token = self.next_token();
1026992
let expr = match next_token.token {
@@ -2110,52 +2076,32 @@ impl<'a> Parser<'a> {
21102076
// don't currently try to parse it. (The sign can instead be included
21112077
// inside the value string.)
21122078

2113-
// The first token in an interval is a string literal which specifies
2114-
// the duration of the interval.
2115-
let value = self.parse_interval_expr()?;
2079+
// to match the different flavours of INTERVAL syntax, we only allow expressions
2080+
// if the dialect requires an interval qualifier,
2081+
// see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details
2082+
let value = if self.dialect.require_interval_qualifier() {
2083+
// parse a whole expression so `INTERVAL 1 + 1 DAY` is valid
2084+
self.parse_expr()?
2085+
} else {
2086+
// parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not
2087+
// this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly
2088+
self.parse_prefix()?
2089+
};
21162090

21172091
// Following the string literal is a qualifier which indicates the units
21182092
// of the duration specified in the string literal.
21192093
//
21202094
// Note that PostgreSQL allows omitting the qualifier, so we provide
21212095
// this more general implementation.
2122-
let leading_field = match self.peek_token().token {
2123-
Token::Word(kw)
2124-
if [
2125-
Keyword::YEAR,
2126-
Keyword::MONTH,
2127-
Keyword::WEEK,
2128-
Keyword::DAY,
2129-
Keyword::HOUR,
2130-
Keyword::MINUTE,
2131-
Keyword::SECOND,
2132-
Keyword::CENTURY,
2133-
Keyword::DECADE,
2134-
Keyword::DOW,
2135-
Keyword::DOY,
2136-
Keyword::EPOCH,
2137-
Keyword::ISODOW,
2138-
Keyword::ISOYEAR,
2139-
Keyword::JULIAN,
2140-
Keyword::MICROSECOND,
2141-
Keyword::MICROSECONDS,
2142-
Keyword::MILLENIUM,
2143-
Keyword::MILLENNIUM,
2144-
Keyword::MILLISECOND,
2145-
Keyword::MILLISECONDS,
2146-
Keyword::NANOSECOND,
2147-
Keyword::NANOSECONDS,
2148-
Keyword::QUARTER,
2149-
Keyword::TIMEZONE,
2150-
Keyword::TIMEZONE_HOUR,
2151-
Keyword::TIMEZONE_MINUTE,
2152-
]
2153-
.iter()
2154-
.any(|d| kw.keyword == *d) =>
2155-
{
2156-
Some(self.parse_date_time_field()?)
2157-
}
2158-
_ => None,
2096+
let leading_field = if self.next_token_is_temporal_unit() {
2097+
Some(self.parse_date_time_field()?)
2098+
} else if self.dialect.require_interval_qualifier() {
2099+
return parser_err!(
2100+
"INTERVAL requires a unit after the literal value",
2101+
self.peek_token().location
2102+
);
2103+
} else {
2104+
None
21592105
};
21602106

21612107
let (leading_precision, last_field, fsec_precision) =
@@ -2192,6 +2138,45 @@ impl<'a> Parser<'a> {
21922138
}))
21932139
}
21942140

2141+
/// Peek at the next token and determine if it is a temporal unit
2142+
/// like `second`.
2143+
pub fn next_token_is_temporal_unit(&mut self) -> bool {
2144+
if let Token::Word(word) = self.peek_token().token {
2145+
matches!(
2146+
word.keyword,
2147+
Keyword::YEAR
2148+
| Keyword::MONTH
2149+
| Keyword::WEEK
2150+
| Keyword::DAY
2151+
| Keyword::HOUR
2152+
| Keyword::MINUTE
2153+
| Keyword::SECOND
2154+
| Keyword::CENTURY
2155+
| Keyword::DECADE
2156+
| Keyword::DOW
2157+
| Keyword::DOY
2158+
| Keyword::EPOCH
2159+
| Keyword::ISODOW
2160+
| Keyword::ISOYEAR
2161+
| Keyword::JULIAN
2162+
| Keyword::MICROSECOND
2163+
| Keyword::MICROSECONDS
2164+
| Keyword::MILLENIUM
2165+
| Keyword::MILLENNIUM
2166+
| Keyword::MILLISECOND
2167+
| Keyword::MILLISECONDS
2168+
| Keyword::NANOSECOND
2169+
| Keyword::NANOSECONDS
2170+
| Keyword::QUARTER
2171+
| Keyword::TIMEZONE
2172+
| Keyword::TIMEZONE_HOUR
2173+
| Keyword::TIMEZONE_MINUTE
2174+
)
2175+
} else {
2176+
false
2177+
}
2178+
}
2179+
21952180
/// Bigquery specific: Parse a struct literal
21962181
/// Syntax
21972182
/// ```sql

tests/sqlparser_bigquery.rs

+8-13
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
#[macro_use]
1414
mod test_utils;
1515

16-
use sqlparser::ast;
1716
use std::ops::Deref;
1817

1918
use sqlparser::ast::*;
@@ -830,16 +829,14 @@ fn parse_typed_struct_syntax_bigquery() {
830829
expr_from_projection(&select.projection[3])
831830
);
832831

833-
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
832+
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '2' HOUR), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
834833
let select = bigquery().verified_only_select(sql);
835834
assert_eq!(2, select.projection.len());
836835
assert_eq!(
837836
&Expr::Struct {
838-
values: vec![Expr::Interval(ast::Interval {
839-
value: Box::new(Expr::Value(Value::SingleQuotedString(
840-
"1-2 3 4:5:6.789999".to_string()
841-
))),
842-
leading_field: None,
837+
values: vec![Expr::Interval(Interval {
838+
value: Box::new(Expr::Value(Value::SingleQuotedString("2".to_string()))),
839+
leading_field: Some(DateTimeField::Hour),
843840
leading_precision: None,
844841
last_field: None,
845842
fractional_seconds_precision: None
@@ -1141,16 +1138,14 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
11411138
expr_from_projection(&select.projection[3])
11421139
);
11431140

1144-
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
1141+
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1' MONTH), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
11451142
let select = bigquery_and_generic().verified_only_select(sql);
11461143
assert_eq!(2, select.projection.len());
11471144
assert_eq!(
11481145
&Expr::Struct {
1149-
values: vec![Expr::Interval(ast::Interval {
1150-
value: Box::new(Expr::Value(Value::SingleQuotedString(
1151-
"1-2 3 4:5:6.789999".to_string()
1152-
))),
1153-
leading_field: None,
1146+
values: vec![Expr::Interval(Interval {
1147+
value: Box::new(Expr::Value(Value::SingleQuotedString("1".to_string()))),
1148+
leading_field: Some(DateTimeField::Month),
11541149
leading_precision: None,
11551150
last_field: None,
11561151
fractional_seconds_precision: None

0 commit comments

Comments
 (0)