Skip to content

Fix INTERVAL parsing to support expressions and units via dialect #1398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/dialect/ansi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,8 @@ impl Dialect for AnsiDialect {
fn is_identifier_part(&self, ch: char) -> bool {
ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch.is_ascii_digit() || ch == '_'
}

fn require_interval_qualifier(&self) -> bool {
true
}
}
4 changes: 4 additions & 0 deletions src/dialect/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,8 @@ impl Dialect for BigQueryDialect {
fn supports_select_wildcard_except(&self) -> bool {
true
}

fn require_interval_qualifier(&self) -> bool {
true
}
}
4 changes: 4 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,8 @@ impl Dialect for ClickHouseDialect {
fn describe_requires_table_keyword(&self) -> bool {
true
}

fn require_interval_qualifier(&self) -> bool {
true
}
}
4 changes: 4 additions & 0 deletions src/dialect/databricks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,8 @@ impl Dialect for DatabricksDialect {
fn supports_select_wildcard_except(&self) -> bool {
true
}

fn require_interval_qualifier(&self) -> bool {
true
}
}
4 changes: 4 additions & 0 deletions src/dialect/hive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,8 @@ impl Dialect for HiveDialect {
fn supports_numeric_prefix(&self) -> bool {
true
}

fn require_interval_qualifier(&self) -> bool {
true
}
}
21 changes: 21 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,27 @@ pub trait Dialect: Debug + Any {
fn supports_create_index_with_clause(&self) -> bool {
false
}

/// Whether `INTERVAL` expressions require units (called "qualifiers" in the ANSI SQL spec) to be specified,
/// e.g. `INTERVAL 1 DAY` vs `INTERVAL 1`.
///
/// Expressions within intervals (e.g. `INTERVAL '1' + '1' DAY`) are only allowed when units are required.
///
/// See <https://github.com/sqlparser-rs/sqlparser-rs/pull/1398> for more information.
///
/// When `true`:
/// * `INTERVAL '1' DAY` is VALID
/// * `INTERVAL 1 + 1 DAY` is VALID
/// * `INTERVAL '1' + '1' DAY` is VALID
/// * `INTERVAL '1'` is INVALID
///
/// When `false`:
/// * `INTERVAL '1'` is VALID
/// * `INTERVAL '1' DAY` is VALID — unit is not required, but still allowed
/// * `INTERVAL 1 + 1 DAY` is INVALID
fn require_interval_qualifier(&self) -> bool {
false
}
}

/// This represents the operators for which precedence must be defined
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/mysql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ impl Dialect for MySqlDialect {
None
}
}

fn require_interval_qualifier(&self) -> bool {
true
}
}

/// `LOCK TABLES`
Expand Down
145 changes: 65 additions & 80 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,6 @@ macro_rules! parser_err {
};
}

// Returns a successful result if the optional expression is some
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive-by change, this seemed really ugly, I couldn't resist fixing it.

macro_rules! return_ok_if_some {
($e:expr) => {{
if let Some(v) = $e {
return Ok(v);
}
}};
}

#[cfg(feature = "std")]
/// Implementation [`RecursionCounter`] if std is available
mod recursion {
Expand Down Expand Up @@ -896,35 +887,6 @@ impl<'a> Parser<'a> {
Ok(expr)
}

pub fn parse_interval_expr(&mut self) -> Result<Expr, ParserError> {
let precedence = self.dialect.prec_unknown();
let mut expr = self.parse_prefix()?;

loop {
let next_precedence = self.get_next_interval_precedence()?;

if precedence >= next_precedence {
break;
}

expr = self.parse_infix(expr, next_precedence)?;
}

Ok(expr)
}

/// Get the precedence of the next token, with AND, OR, and XOR.
pub fn get_next_interval_precedence(&self) -> Result<u8, ParserError> {
let token = self.peek_token();

match token.token {
Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()),
_ => self.get_next_precedence(),
}
}

pub fn parse_assert(&mut self) -> Result<Statement, ParserError> {
let condition = self.parse_expr()?;
let message = if self.parse_keyword(Keyword::AS) {
Expand Down Expand Up @@ -972,7 +934,7 @@ impl<'a> Parser<'a> {
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
let loc = self.peek_token().location;
return_ok_if_some!(self.maybe_parse(|parser| {
let opt_expr = self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_interval(),
// PostgreSQL allows almost any identifier to be used as custom data type name,
Expand All @@ -988,7 +950,11 @@ impl<'a> Parser<'a> {
value: parser.parse_literal_string()?,
}),
}
}));
});

if let Some(expr) = opt_expr {
return Ok(expr);
}

let next_token = self.next_token();
let expr = match next_token.token {
Expand Down Expand Up @@ -2078,52 +2044,32 @@ impl<'a> Parser<'a> {
// don't currently try to parse it. (The sign can instead be included
// inside the value string.)

// The first token in an interval is a string literal which specifies
// the duration of the interval.
let value = self.parse_interval_expr()?;
// to match the different flavours of INTERVAL syntax, we only allow expressions
// if the dialect requires an interval qualifier,
// see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details
let value = if self.dialect.require_interval_qualifier() {
// parse a whole expression so `INTERVAL 1 + 1 DAY` is valid
self.parse_expr()?
} else {
// parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not
// this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly
self.parse_prefix()?
};

// Following the string literal is a qualifier which indicates the units
// of the duration specified in the string literal.
//
// Note that PostgreSQL allows omitting the qualifier, so we provide
// this more general implementation.
let leading_field = match self.peek_token().token {
Token::Word(kw)
if [
Keyword::YEAR,
Keyword::MONTH,
Keyword::WEEK,
Keyword::DAY,
Keyword::HOUR,
Keyword::MINUTE,
Keyword::SECOND,
Keyword::CENTURY,
Keyword::DECADE,
Keyword::DOW,
Keyword::DOY,
Keyword::EPOCH,
Keyword::ISODOW,
Keyword::ISOYEAR,
Keyword::JULIAN,
Keyword::MICROSECOND,
Keyword::MICROSECONDS,
Keyword::MILLENIUM,
Keyword::MILLENNIUM,
Keyword::MILLISECOND,
Keyword::MILLISECONDS,
Keyword::NANOSECOND,
Keyword::NANOSECONDS,
Keyword::QUARTER,
Keyword::TIMEZONE,
Keyword::TIMEZONE_HOUR,
Keyword::TIMEZONE_MINUTE,
]
.iter()
.any(|d| kw.keyword == *d) =>
{
Some(self.parse_date_time_field()?)
}
_ => None,
let leading_field = if self.next_token_is_temporal_unit() {
Some(self.parse_date_time_field()?)
} else if self.dialect.require_interval_qualifier() {
return parser_err!(
"INTERVAL requires a unit after the literal value",
self.peek_token().location
);
} else {
None
};

let (leading_precision, last_field, fsec_precision) =
Expand Down Expand Up @@ -2160,6 +2106,45 @@ impl<'a> Parser<'a> {
}))
}

/// Peek at the next token and determine if it is a temporal unit
/// like `second`.
pub fn next_token_is_temporal_unit(&mut self) -> bool {
if let Token::Word(word) = self.peek_token().token {
matches!(
word.keyword,
Keyword::YEAR
| Keyword::MONTH
| Keyword::WEEK
| Keyword::DAY
| Keyword::HOUR
| Keyword::MINUTE
| Keyword::SECOND
| Keyword::CENTURY
| Keyword::DECADE
| Keyword::DOW
| Keyword::DOY
| Keyword::EPOCH
| Keyword::ISODOW
| Keyword::ISOYEAR
| Keyword::JULIAN
| Keyword::MICROSECOND
| Keyword::MICROSECONDS
| Keyword::MILLENIUM
| Keyword::MILLENNIUM
| Keyword::MILLISECOND
| Keyword::MILLISECONDS
| Keyword::NANOSECOND
| Keyword::NANOSECONDS
| Keyword::QUARTER
| Keyword::TIMEZONE
| Keyword::TIMEZONE_HOUR
| Keyword::TIMEZONE_MINUTE
)
} else {
false
}
}

/// Bigquery specific: Parse a struct literal
/// Syntax
/// ```sql
Expand Down
21 changes: 8 additions & 13 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
#[macro_use]
mod test_utils;

use sqlparser::ast;
use std::ops::Deref;

use sqlparser::ast::*;
Expand Down Expand Up @@ -830,16 +829,14 @@ fn parse_typed_struct_syntax_bigquery() {
expr_from_projection(&select.projection[3])
);

let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '2' HOUR), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you please

  1. leave the existing test as is to show what the effect of the changes are on this query? (I think it would error?)
  2. Add a new test for this new '2' HOUR variant

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the BigQuery syntax in https://cloud.google.com/bigquery/docs/reference/standard-sql/interval_functions I actually think the new test is more useful / correct -- all the intervals appear to have a unit after them, such as:

...
  UNNEST([INTERVAL '1-2 3 4:5:6.789999' YEAR TO SECOND,
          INTERVAL '0-13 370 48:61:61' YEAR TO SECOND]) AS i
...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yup, BigQuery is MySQL flavoured when it comes to intervals, so intervals without units are not permitted.

let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
values: vec![Expr::Interval(Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString("2".to_string()))),
leading_field: Some(DateTimeField::Hour),
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
Expand Down Expand Up @@ -1141,16 +1138,14 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
expr_from_projection(&select.projection[3])
);

let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1' MONTH), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
values: vec![Expr::Interval(Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString("1".to_string()))),
leading_field: Some(DateTimeField::Month),
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
Expand Down
Loading
Loading