Skip to content

Fix INTERVAL parsing to support expressions and units via dialect #1398

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/dialect/duckdb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,8 @@ impl Dialect for DuckDbDialect {
fn support_map_literal_syntax(&self) -> bool {
true
}

fn require_interval_units(&self) -> bool {
false
}
}
17 changes: 17 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,23 @@ pub trait Dialect: Debug + Any {
fn describe_requires_table_keyword(&self) -> bool {
false
}

/// Whether or not units are required with interval expressions.
///
/// When `true`:
/// * `INTERVAL '1' DAY` is VALID
/// * `INTERVAL 1 + 1 DAY` is VALID
/// * `INTERVAL '1' + '1' DAY` is VALID
/// * `INTERVAL '1'` is INVALID
///
/// When `false`:
/// * `INTERVAL '1' DAY` is VALID
/// * `INTERVAL '1'` is VALID
/// * `INTERVAL '1 second'` is VALID
/// * `INTERVAL 1 + 1 DAY` is INVALID
fn require_interval_units(&self) -> bool {
true
}
}

/// This represents the operators for which precedence must be defined
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/postgresql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,10 @@ impl Dialect for PostgreSqlDialect {
Precedence::Or => OR_PREC,
}
}

fn require_interval_units(&self) -> bool {
false
}
}

pub fn parse_comment(parser: &mut Parser) -> Result<Statement, ParserError> {
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/redshift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,4 +63,8 @@ impl Dialect for RedshiftSqlDialect {
fn supports_connect_by(&self) -> bool {
true
}

fn require_interval_units(&self) -> bool {
false
}
}
4 changes: 4 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,10 @@ impl Dialect for SnowflakeDialect {
fn describe_requires_table_keyword(&self) -> bool {
true
}

fn require_interval_units(&self) -> bool {
false
}
}

/// Parse snowflake create table statement.
Expand Down
159 changes: 79 additions & 80 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,6 @@ macro_rules! parser_err {
};
}

// Returns a successful result if the optional expression is some
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

drive-by change, this seemed really ugly, I couldn't resist fixing it.

macro_rules! return_ok_if_some {
($e:expr) => {{
if let Some(v) = $e {
return Ok(v);
}
}};
}

#[cfg(feature = "std")]
/// Implementation [`RecursionCounter`] if std is available
mod recursion {
Expand Down Expand Up @@ -896,35 +887,6 @@ impl<'a> Parser<'a> {
Ok(expr)
}

pub fn parse_interval_expr(&mut self) -> Result<Expr, ParserError> {
let precedence = self.dialect.prec_unknown();
let mut expr = self.parse_prefix()?;

loop {
let next_precedence = self.get_next_interval_precedence()?;

if precedence >= next_precedence {
break;
}

expr = self.parse_infix(expr, next_precedence)?;
}

Ok(expr)
}

/// Get the precedence of the next token, with AND, OR, and XOR.
pub fn get_next_interval_precedence(&self) -> Result<u8, ParserError> {
let token = self.peek_token();

match token.token {
Token::Word(w) if w.keyword == Keyword::AND => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::OR => Ok(self.dialect.prec_unknown()),
Token::Word(w) if w.keyword == Keyword::XOR => Ok(self.dialect.prec_unknown()),
_ => self.get_next_precedence(),
}
}

pub fn parse_assert(&mut self) -> Result<Statement, ParserError> {
let condition = self.parse_expr()?;
let message = if self.parse_keyword(Keyword::AS) {
Expand Down Expand Up @@ -972,7 +934,7 @@ impl<'a> Parser<'a> {
// name is not followed by a string literal, but in fact in PostgreSQL it is a valid
// expression that should parse as the column name "date".
let loc = self.peek_token().location;
return_ok_if_some!(self.maybe_parse(|parser| {
let opt_expr = self.maybe_parse(|parser| {
match parser.parse_data_type()? {
DataType::Interval => parser.parse_interval(),
// PostgreSQL allows almost any identifier to be used as custom data type name,
Expand All @@ -988,7 +950,11 @@ impl<'a> Parser<'a> {
value: parser.parse_literal_string()?,
}),
}
}));
});

if let Some(expr) = opt_expr {
return Ok(expr);
}

let next_token = self.next_token();
let expr = match next_token.token {
Expand Down Expand Up @@ -2079,52 +2045,21 @@ impl<'a> Parser<'a> {
// don't currently try to parse it. (The sign can instead be included
// inside the value string.)

// The first token in an interval is a string literal which specifies
// the duration of the interval.
let value = self.parse_interval_expr()?;
let (value, has_units) = if self.dialect.require_interval_units() {
self.parse_interval_expr_units_required()?
} else {
self.parse_interval_expr_units_not_require()?
};

// Following the string literal is a qualifier which indicates the units
// of the duration specified in the string literal.
//
// Note that PostgreSQL allows omitting the qualifier, so we provide
// this more general implementation.
let leading_field = match self.peek_token().token {
Token::Word(kw)
if [
Keyword::YEAR,
Keyword::MONTH,
Keyword::WEEK,
Keyword::DAY,
Keyword::HOUR,
Keyword::MINUTE,
Keyword::SECOND,
Keyword::CENTURY,
Keyword::DECADE,
Keyword::DOW,
Keyword::DOY,
Keyword::EPOCH,
Keyword::ISODOW,
Keyword::ISOYEAR,
Keyword::JULIAN,
Keyword::MICROSECOND,
Keyword::MICROSECONDS,
Keyword::MILLENIUM,
Keyword::MILLENNIUM,
Keyword::MILLISECOND,
Keyword::MILLISECONDS,
Keyword::NANOSECOND,
Keyword::NANOSECONDS,
Keyword::QUARTER,
Keyword::TIMEZONE,
Keyword::TIMEZONE_HOUR,
Keyword::TIMEZONE_MINUTE,
]
.iter()
.any(|d| kw.keyword == *d) =>
{
Some(self.parse_date_time_field()?)
}
_ => None,
let leading_field = if has_units {
Some(self.parse_date_time_field()?)
} else {
None
};

let (leading_precision, last_field, fsec_precision) =
Expand Down Expand Up @@ -2161,6 +2096,70 @@ impl<'a> Parser<'a> {
}))
}

/// if `require_interval_units` is `true`, continue parsing expressions until a unit is found
///
/// # Returns
///
/// A tuple of (interval expression, whether a unit is found)
pub fn parse_interval_expr_units_required(&mut self) -> Result<(Expr, bool), ParserError> {
let mut expr = self.parse_prefix()?;

loop {
if self.next_token_is_unit() {
return Ok((expr, true));
} else {
expr = self.parse_infix(expr, self.dialect.prec_unknown())?;
}
}
}

/// if `require_interval_units` is `false`, just parse the first expression, but check if the next token is a unit
///
/// # Returns
///
/// A tuple of (interval expression, whether a unit is found)
pub fn parse_interval_expr_units_not_require(&mut self) -> Result<(Expr, bool), ParserError> {
self.parse_prefix()
.map(|expr| (expr, self.next_token_is_unit()))
}

pub fn next_token_is_unit(&mut self) -> bool {
if let Token::Word(word) = self.peek_token().token {
matches!(
word.keyword,
Keyword::YEAR
| Keyword::MONTH
| Keyword::WEEK
| Keyword::DAY
| Keyword::HOUR
| Keyword::MINUTE
| Keyword::SECOND
| Keyword::CENTURY
| Keyword::DECADE
| Keyword::DOW
| Keyword::DOY
| Keyword::EPOCH
| Keyword::ISODOW
| Keyword::ISOYEAR
| Keyword::JULIAN
| Keyword::MICROSECOND
| Keyword::MICROSECONDS
| Keyword::MILLENIUM
| Keyword::MILLENNIUM
| Keyword::MILLISECOND
| Keyword::MILLISECONDS
| Keyword::NANOSECOND
| Keyword::NANOSECONDS
| Keyword::QUARTER
| Keyword::TIMEZONE
| Keyword::TIMEZONE_HOUR
| Keyword::TIMEZONE_MINUTE
)
} else {
false
}
}

/// Bigquery specific: Parse a struct literal
/// Syntax
/// ```sql
Expand Down
18 changes: 7 additions & 11 deletions tests/sqlparser_bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -830,16 +830,14 @@ fn parse_typed_struct_syntax_bigquery() {
expr_from_projection(&select.projection[3])
);

let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1' DAY), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let select = bigquery().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
value: Box::new(Expr::Value(Value::SingleQuotedString("1".to_string()))),
leading_field: Some(DateTimeField::Day),
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
Expand Down Expand Up @@ -1141,16 +1139,14 @@ fn parse_typed_struct_syntax_bigquery_and_generic() {
expr_from_projection(&select.projection[3])
);

let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '1-2 3 4:5:6.789999'), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let sql = r#"SELECT STRUCT<INTERVAL>(INTERVAL '2' MONTH), STRUCT<JSON>(JSON '{"class" : {"students" : [{"name" : "Jane"}]}}')"#;
let select = bigquery_and_generic().verified_only_select(sql);
assert_eq!(2, select.projection.len());
assert_eq!(
&Expr::Struct {
values: vec![Expr::Interval(ast::Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString(
"1-2 3 4:5:6.789999".to_string()
))),
leading_field: None,
values: vec![Expr::Interval(Interval {
value: Box::new(Expr::Value(Value::SingleQuotedString("2".to_string()))),
leading_field: Some(DateTimeField::Month),
leading_precision: None,
last_field: None,
fractional_seconds_precision: None
Expand Down
Loading
Loading