Skip to content

Commit afa5f08

Browse files
jmhainalamb
andauthored
Support for Postgres array slice syntax (apache#1290)
Co-authored-by: Andrew Lamb <[email protected]>
1 parent 80c03f5 commit afa5f08

File tree

5 files changed

+355
-60
lines changed

5 files changed

+355
-60
lines changed

src/ast/mod.rs

+72-11
Original file line numberDiff line numberDiff line change
@@ -679,7 +679,7 @@ pub enum Expr {
679679
},
680680
/// Access a map-like object by field (e.g. `column['field']` or `column[4]`
681681
/// Note that depending on the dialect, struct like accesses may be
682-
/// parsed as [`ArrayIndex`](Self::ArrayIndex) or [`MapAccess`](Self::MapAccess)
682+
/// parsed as [`Subscript`](Self::Subscript) or [`MapAccess`](Self::MapAccess)
683683
/// <https://clickhouse.com/docs/en/sql-reference/data-types/map/>
684684
MapAccess {
685685
column: Box<Expr>,
@@ -746,10 +746,10 @@ pub enum Expr {
746746
/// ```
747747
/// [1]: https://duckdb.org/docs/sql/data_types/struct#creating-structs
748748
Dictionary(Vec<DictionaryField>),
749-
/// An array index expression e.g. `(ARRAY[1, 2])[1]` or `(current_schemas(FALSE))[1]`
750-
ArrayIndex {
751-
obj: Box<Expr>,
752-
indexes: Vec<Expr>,
749+
/// An access of nested data using subscript syntax, for example `array[2]`.
750+
Subscript {
751+
expr: Box<Expr>,
752+
subscript: Box<Subscript>,
753753
},
754754
/// An array expression e.g. `ARRAY[1, 2]`
755755
Array(Array),
@@ -805,6 +805,68 @@ pub enum Expr {
805805
Lambda(LambdaFunction),
806806
}
807807

808+
/// The contents inside the `[` and `]` in a subscript expression.
809+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
810+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
811+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
812+
pub enum Subscript {
813+
/// Accesses the element of the array at the given index.
814+
Index { index: Expr },
815+
816+
/// Accesses a slice of an array on PostgreSQL, e.g.
817+
///
818+
/// ```plaintext
819+
/// => select (array[1,2,3,4,5,6])[2:5];
820+
/// -----------
821+
/// {2,3,4,5}
822+
/// ```
823+
///
824+
/// The lower and/or upper bound can be omitted to slice from the start or
825+
/// end of the array respectively.
826+
///
827+
/// See <https://www.postgresql.org/docs/current/arrays.html#ARRAYS-ACCESSING>.
828+
///
829+
/// Also supports an optional "stride" as the last element (this is not
830+
/// supported by postgres), e.g.
831+
///
832+
/// ```plaintext
833+
/// => select (array[1,2,3,4,5,6])[1:6:2];
834+
/// -----------
835+
/// {1,3,5}
836+
/// ```
837+
Slice {
838+
lower_bound: Option<Expr>,
839+
upper_bound: Option<Expr>,
840+
stride: Option<Expr>,
841+
},
842+
}
843+
844+
impl fmt::Display for Subscript {
845+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
846+
match self {
847+
Subscript::Index { index } => write!(f, "{index}"),
848+
Subscript::Slice {
849+
lower_bound,
850+
upper_bound,
851+
stride,
852+
} => {
853+
if let Some(lower) = lower_bound {
854+
write!(f, "{lower}")?;
855+
}
856+
write!(f, ":")?;
857+
if let Some(upper) = upper_bound {
858+
write!(f, "{upper}")?;
859+
}
860+
if let Some(stride) = stride {
861+
write!(f, ":")?;
862+
write!(f, "{stride}")?;
863+
}
864+
Ok(())
865+
}
866+
}
867+
}
868+
}
869+
808870
/// A lambda function.
809871
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
810872
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -1251,12 +1313,11 @@ impl fmt::Display for Expr {
12511313
Expr::Dictionary(fields) => {
12521314
write!(f, "{{{}}}", display_comma_separated(fields))
12531315
}
1254-
Expr::ArrayIndex { obj, indexes } => {
1255-
write!(f, "{obj}")?;
1256-
for i in indexes {
1257-
write!(f, "[{i}]")?;
1258-
}
1259-
Ok(())
1316+
Expr::Subscript {
1317+
expr,
1318+
subscript: key,
1319+
} => {
1320+
write!(f, "{expr}[{key}]")
12601321
}
12611322
Expr::Array(set) => {
12621323
write!(f, "{set}")

src/parser/mod.rs

+81-13
Original file line numberDiff line numberDiff line change
@@ -2544,8 +2544,7 @@ impl<'a> Parser<'a> {
25442544
})
25452545
} else if Token::LBracket == tok {
25462546
if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) {
2547-
// parse index
2548-
self.parse_array_index(expr)
2547+
self.parse_subscript(expr)
25492548
} else if dialect_of!(self is SnowflakeDialect) {
25502549
self.prev_token();
25512550
self.parse_json_access(expr)
@@ -2573,18 +2572,87 @@ impl<'a> Parser<'a> {
25732572
}
25742573
}
25752574

2576-
pub fn parse_array_index(&mut self, expr: Expr) -> Result<Expr, ParserError> {
2577-
let index = self.parse_expr()?;
2578-
self.expect_token(&Token::RBracket)?;
2579-
let mut indexes: Vec<Expr> = vec![index];
2580-
while self.consume_token(&Token::LBracket) {
2581-
let index = self.parse_expr()?;
2575+
/// Parses an array subscript like
2576+
/// * `[:]`
2577+
/// * `[l]`
2578+
/// * `[l:]`
2579+
/// * `[:u]`
2580+
/// * `[l:u]`
2581+
/// * `[l:u:s]`
2582+
///
2583+
/// Parser is right after `[`
2584+
fn parse_subscript_inner(&mut self) -> Result<Subscript, ParserError> {
2585+
// at either `<lower>:(rest)` or `:(rest)]`
2586+
let lower_bound = if self.consume_token(&Token::Colon) {
2587+
None
2588+
} else {
2589+
Some(self.parse_expr()?)
2590+
};
2591+
2592+
// check for end
2593+
if self.consume_token(&Token::RBracket) {
2594+
if let Some(lower_bound) = lower_bound {
2595+
return Ok(Subscript::Index { index: lower_bound });
2596+
};
2597+
return Ok(Subscript::Slice {
2598+
lower_bound,
2599+
upper_bound: None,
2600+
stride: None,
2601+
});
2602+
}
2603+
2604+
// consume the `:`
2605+
if lower_bound.is_some() {
2606+
self.expect_token(&Token::Colon)?;
2607+
}
2608+
2609+
// we are now at either `]`, `<upper>(rest)]`
2610+
let upper_bound = if self.consume_token(&Token::RBracket) {
2611+
return Ok(Subscript::Slice {
2612+
lower_bound,
2613+
upper_bound: None,
2614+
stride: None,
2615+
});
2616+
} else {
2617+
Some(self.parse_expr()?)
2618+
};
2619+
2620+
// check for end
2621+
if self.consume_token(&Token::RBracket) {
2622+
return Ok(Subscript::Slice {
2623+
lower_bound,
2624+
upper_bound,
2625+
stride: None,
2626+
});
2627+
}
2628+
2629+
// we are now at `:]` or `:stride]`
2630+
self.expect_token(&Token::Colon)?;
2631+
let stride = if self.consume_token(&Token::RBracket) {
2632+
None
2633+
} else {
2634+
Some(self.parse_expr()?)
2635+
};
2636+
2637+
if stride.is_some() {
25822638
self.expect_token(&Token::RBracket)?;
2583-
indexes.push(index);
25842639
}
2585-
Ok(Expr::ArrayIndex {
2586-
obj: Box::new(expr),
2587-
indexes,
2640+
2641+
Ok(Subscript::Slice {
2642+
lower_bound,
2643+
upper_bound,
2644+
stride,
2645+
})
2646+
}
2647+
2648+
/// Parses an array subscript like `[1:3]`
2649+
///
2650+
/// Parser is right after `[`
2651+
pub fn parse_subscript(&mut self, expr: Expr) -> Result<Expr, ParserError> {
2652+
let subscript = self.parse_subscript_inner()?;
2653+
Ok(Expr::Subscript {
2654+
expr: Box::new(expr),
2655+
subscript: Box::new(subscript),
25882656
})
25892657
}
25902658

@@ -2838,7 +2906,7 @@ impl<'a> Parser<'a> {
28382906
Ok(Self::MUL_DIV_MOD_OP_PREC)
28392907
}
28402908
Token::DoubleColon => Ok(50),
2841-
Token::Colon => Ok(50),
2909+
Token::Colon if dialect_of!(self is SnowflakeDialect) => Ok(50),
28422910
Token::ExclamationMark => Ok(50),
28432911
Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50),
28442912
Token::Arrow

tests/sqlparser_duckdb.rs

+5-3
Original file line numberDiff line numberDiff line change
@@ -528,16 +528,18 @@ fn test_array_index() {
528528
_ => panic!("Expected an expression with alias"),
529529
};
530530
assert_eq!(
531-
&Expr::ArrayIndex {
532-
obj: Box::new(Expr::Array(Array {
531+
&Expr::Subscript {
532+
expr: Box::new(Expr::Array(Array {
533533
elem: vec![
534534
Expr::Value(Value::SingleQuotedString("a".to_owned())),
535535
Expr::Value(Value::SingleQuotedString("b".to_owned())),
536536
Expr::Value(Value::SingleQuotedString("c".to_owned()))
537537
],
538538
named: false
539539
})),
540-
indexes: vec![Expr::Value(number("3"))]
540+
subscript: Box::new(Subscript::Index {
541+
index: Expr::Value(number("3"))
542+
})
541543
},
542544
expr
543545
);

0 commit comments

Comments
 (0)