diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 042b1030e..2a687800a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -51,7 +51,8 @@ pub use self::query::{ Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With, }; pub use self::value::{ - escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, + escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, + TrimWhereField, Value, }; use crate::ast::helpers::stmt_data_loading::{ @@ -270,66 +271,6 @@ impl fmt::Display for Interval { } } -/// JsonOperator -#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum JsonOperator { - /// -> keeps the value as json - Arrow, - /// ->> keeps the value as text or int. - LongArrow, - /// #> Extracts JSON sub-object at the specified path - HashArrow, - /// #>> Extracts JSON sub-object at the specified path as text - HashLongArrow, - /// : Colon is used by Snowflake (Which is similar to LongArrow) - Colon, - /// jsonb @> jsonb -> boolean: Test whether left json contains the right json - AtArrow, - /// jsonb <@ jsonb -> boolean: Test whether right json contains the left json - ArrowAt, - /// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified - /// path, where path elements can be either field keys or array indexes. - HashMinus, - /// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified - /// JSON value? - AtQuestion, - /// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check - /// for the specified JSON value. Only the first item of the result is taken into - /// account. If the result is not Boolean, then NULL is returned. - AtAt, -} - -impl fmt::Display for JsonOperator { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - JsonOperator::Arrow => { - write!(f, "->") - } - JsonOperator::LongArrow => { - write!(f, "->>") - } - JsonOperator::HashArrow => { - write!(f, "#>") - } - JsonOperator::HashLongArrow => { - write!(f, "#>>") - } - JsonOperator::Colon => { - write!(f, ":") - } - JsonOperator::AtArrow => { - write!(f, "@>") - } - JsonOperator::ArrowAt => write!(f, "<@"), - JsonOperator::HashMinus => write!(f, "#-"), - JsonOperator::AtQuestion => write!(f, "@?"), - JsonOperator::AtAt => write!(f, "@@"), - } - } -} - /// A field definition within a struct. /// /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type @@ -412,6 +353,59 @@ impl fmt::Display for MapAccessKey { } } +/// An element of a JSON path. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum JsonPathElem { + /// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`. + /// + /// See . + Dot { key: String, quoted: bool }, + /// Accesses an object field or array element using bracket notation, + /// e.g. `obj['foo']`. + /// + /// See . + Bracket { key: Expr }, +} + +/// A JSON path. +/// +/// See . +/// See . +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct JsonPath { + pub path: Vec, +} + +impl fmt::Display for JsonPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, elem) in self.path.iter().enumerate() { + match elem { + JsonPathElem::Dot { key, quoted } => { + if i == 0 { + write!(f, ":")?; + } else { + write!(f, ".")?; + } + + if *quoted { + write!(f, "\"{}\"", escape_double_quote_string(key))?; + } else { + write!(f, "{key}")?; + } + } + JsonPathElem::Bracket { key } => { + write!(f, "[{key}]")?; + } + } + } + Ok(()) + } +} + /// The syntax used for in a cast expression. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -449,11 +443,16 @@ pub enum Expr { Identifier(Ident), /// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col` CompoundIdentifier(Vec), - /// JSON access (postgres) eg: data->'tags' + /// Access data nested in a value containing semi-structured data, such as + /// the `VARIANT` type on Snowflake. for example `src:customer[0].name`. + /// + /// See . + /// See . JsonAccess { - left: Box, - operator: JsonOperator, - right: Box, + /// The value being queried. + value: Box, + /// The path to the data to extract. + path: JsonPath, }, /// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n CompositeAccess { @@ -1224,16 +1223,8 @@ impl fmt::Display for Expr { Expr::Array(set) => { write!(f, "{set}") } - Expr::JsonAccess { - left, - operator, - right, - } => { - if operator == &JsonOperator::Colon { - write!(f, "{left}{operator}{right}") - } else { - write!(f, "{left} {operator} {right}") - } + Expr::JsonAccess { value, path } => { + write!(f, "{value}{path}") } Expr::CompositeAccess { expr, key } => { write!(f, "{expr}.{key}") diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 8539c461b..c3134b4a3 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -141,6 +141,79 @@ pub enum BinaryOperator { PGNotILikeMatch, /// String "starts with", eg: `a ^@ b` (PostgreSQL-specific) PGStartsWith, + /// The `->` operator. + /// + /// On PostgreSQL, this operator extracts a JSON object field or array + /// element, for example `'{"a":"b"}'::json -> 'a'` or `[1, 2, 3]'::json + /// -> 2`. + /// + /// See . + Arrow, + /// The `->>` operator. + /// + /// On PostgreSQL, this operator that extracts a JSON object field or JSON + /// array element and converts it to text, for example `'{"a":"b"}'::json + /// ->> 'a'` or `[1, 2, 3]'::json ->> 2`. + /// + /// See . + LongArrow, + /// The `#>` operator. + /// + /// On PostgreSQL, this operator extracts a JSON sub-object at the specified + /// path, for example: + /// + /// ```notrust + ///'{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}' + /// ``` + /// + /// See . + HashArrow, + /// The `#>>` operator. + /// + /// A PostgreSQL-specific operator that extracts JSON sub-object at the + /// specified path, for example + /// + /// ```notrust + ///'{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}' + /// ``` + /// + /// See . + HashLongArrow, + /// The `@@` operator. + /// + /// On PostgreSQL, this is used for JSON and text searches. + /// + /// See . + /// See . + AtAt, + /// The `@>` operator. + /// + /// On PostgreSQL, this is used for JSON and text searches. + /// + /// See . + /// See . + AtArrow, + /// The `<@` operator. + /// + /// On PostgreSQL, this is used for JSON and text searches. + /// + /// See . + /// See . + ArrowAt, + /// The `#-` operator. + /// + /// On PostgreSQL, this operator is used to delete a field or array element + /// at a specified path. + /// + /// See . + HashMinus, + /// The `@?` operator. + /// + /// On PostgreSQL, this operator is used to check the given JSON path + /// returns an item for the JSON value. + /// + /// See . + AtQuestion, /// PostgreSQL-specific custom operator. /// /// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html) @@ -187,6 +260,15 @@ impl fmt::Display for BinaryOperator { BinaryOperator::PGNotLikeMatch => f.write_str("!~~"), BinaryOperator::PGNotILikeMatch => f.write_str("!~~*"), BinaryOperator::PGStartsWith => f.write_str("^@"), + BinaryOperator::Arrow => f.write_str("->"), + BinaryOperator::LongArrow => f.write_str("->>"), + BinaryOperator::HashArrow => f.write_str("#>"), + BinaryOperator::HashLongArrow => f.write_str("#>>"), + BinaryOperator::AtAt => f.write_str("@@"), + BinaryOperator::AtArrow => f.write_str("@>"), + BinaryOperator::ArrowAt => f.write_str("<@"), + BinaryOperator::HashMinus => f.write_str("#-"), + BinaryOperator::AtQuestion => f.write_str("@?"), BinaryOperator::PGCustomBinaryOperator(idents) => { write!(f, "OPERATOR({})", display_separated(idents, ".")) } diff --git a/src/ast/value.rs b/src/ast/value.rs index 84fdf00ae..cbdb8bed2 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -65,8 +65,6 @@ pub enum Value { Null, /// `?` or `$` Prepared statement arg placeholder Placeholder(String), - /// Add support of snowflake field:key - key should be a value - UnQuotedString(String), } impl fmt::Display for Value { @@ -85,7 +83,6 @@ impl fmt::Display for Value { Value::RawStringLiteral(v) => write!(f, "R'{v}'"), Value::Null => write!(f, "NULL"), Value::Placeholder(v) => write!(f, "{v}"), - Value::UnQuotedString(v) => write!(f, "{v}"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fd4f2a0fe..7110402a4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2346,6 +2346,16 @@ impl<'a> Parser<'a> { Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch), Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch), Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch), + Token::Arrow => Some(BinaryOperator::Arrow), + Token::LongArrow => Some(BinaryOperator::LongArrow), + Token::HashArrow => Some(BinaryOperator::HashArrow), + Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), + Token::AtArrow => Some(BinaryOperator::AtArrow), + Token::ArrowAt => Some(BinaryOperator::ArrowAt), + Token::HashMinus => Some(BinaryOperator::HashMinus), + Token::AtQuestion => Some(BinaryOperator::AtQuestion), + Token::AtAt => Some(BinaryOperator::AtAt), + Token::Word(w) => match w.keyword { Keyword::AND => Some(BinaryOperator::And), Keyword::OR => Some(BinaryOperator::Or), @@ -2539,42 +2549,16 @@ impl<'a> Parser<'a> { } else if Token::LBracket == tok { if dialect_of!(self is PostgreSqlDialect | GenericDialect) { // parse index - return self.parse_array_index(expr); + self.parse_array_index(expr) + } else if dialect_of!(self is SnowflakeDialect) { + self.prev_token(); + self.parse_json_access(expr) + } else { + self.parse_map_access(expr) } - self.parse_map_access(expr) - } else if Token::Colon == tok { - Ok(Expr::JsonAccess { - left: Box::new(expr), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(self.parse_value()?)), - }) - } else if Token::Arrow == tok - || Token::LongArrow == tok - || Token::HashArrow == tok - || Token::HashLongArrow == tok - || Token::AtArrow == tok - || Token::ArrowAt == tok - || Token::HashMinus == tok - || Token::AtQuestion == tok - || Token::AtAt == tok - { - let operator = match tok.token { - Token::Arrow => JsonOperator::Arrow, - Token::LongArrow => JsonOperator::LongArrow, - Token::HashArrow => JsonOperator::HashArrow, - Token::HashLongArrow => JsonOperator::HashLongArrow, - Token::AtArrow => JsonOperator::AtArrow, - Token::ArrowAt => JsonOperator::ArrowAt, - Token::HashMinus => JsonOperator::HashMinus, - Token::AtQuestion => JsonOperator::AtQuestion, - Token::AtAt => JsonOperator::AtAt, - _ => unreachable!(), - }; - Ok(Expr::JsonAccess { - left: Box::new(expr), - operator, - right: Box::new(self.parse_expr()?), - }) + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { + self.prev_token(); + self.parse_json_access(expr) } else { // Can only happen if `get_next_precedence` got out of sync with this function parser_err!( @@ -2608,6 +2592,60 @@ impl<'a> Parser<'a> { }) } + fn parse_json_path_object_key(&mut self) -> Result { + let token = self.next_token(); + match token.token { + Token::Word(Word { + value, + // path segments in SF dot notation can be unquoted or double quoted + quote_style: quote_style @ (Some('"') | None), + // some experimentation suggests that snowflake permits + // any keyword here unquoted. + keyword: _, + }) => Ok(JsonPathElem::Dot { + key: value, + quoted: quote_style.is_some(), + }), + + // This token should never be generated on snowflake or generic + // dialects, but we handle it just in case this is used on future + // dialects. + Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }), + + _ => self.expected("variant object key name", token), + } + } + + fn parse_json_access(&mut self, expr: Expr) -> Result { + let mut path = Vec::new(); + loop { + match self.next_token().token { + Token::Colon if path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::Period if !path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::LBracket => { + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + path.push(JsonPathElem::Bracket { key }); + } + _ => { + self.prev_token(); + break; + } + }; + } + + debug_assert!(!path.is_empty()); + Ok(Expr::JsonAccess { + value: Box::new(expr), + path: JsonPath { path }, + }) + } + pub fn parse_map_access(&mut self, expr: Expr) -> Result { let key = self.parse_expr()?; self.expect_token(&Token::RBracket)?; @@ -2711,6 +2749,7 @@ impl<'a> Parser<'a> { } // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference + // higher number = higher precedence const MUL_DIV_MOD_OP_PREC: u8 = 40; const PLUS_MINUS_PREC: u8 = 30; const XOR_PREC: u8 = 24; @@ -2718,6 +2757,7 @@ impl<'a> Parser<'a> { const BETWEEN_PREC: u8 = 20; const LIKE_PREC: u8 = 19; const IS_PREC: u8 = 17; + const PG_OTHER_PREC: u8 = 16; const UNARY_NOT_PREC: u8 = 15; const AND_PREC: u8 = 10; const OR_PREC: u8 = 5; @@ -2802,18 +2842,16 @@ impl<'a> Parser<'a> { Token::DoubleColon => Ok(50), Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), - Token::LBracket + Token::LBracket | Token::Overlap | Token::CaretAt => Ok(50), + Token::Arrow | Token::LongArrow - | Token::Arrow - | Token::Overlap - | Token::CaretAt | Token::HashArrow | Token::HashLongArrow | Token::AtArrow | Token::ArrowAt | Token::HashMinus | Token::AtQuestion - | Token::AtAt => Ok(50), + | Token::AtAt => Ok(Self::PG_OTHER_PREC), _ => Ok(0), } } @@ -6236,17 +6274,6 @@ impl<'a> Parser<'a> { }, )?, }, - // Case when Snowflake Semi-structured data like key:value - Keyword::NoKeyword - | Keyword::LOCATION - | Keyword::TYPE - | Keyword::DATE - | Keyword::START - | Keyword::END - if dialect_of!(self is SnowflakeDialect | GenericDialect) => - { - Ok(Value::UnQuotedString(w.value)) - } _ => self.expected( "a concrete value", TokenWithLocation { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f465b2200..1bf1e8399 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1375,25 +1375,25 @@ fn pg_and_generic() -> TestedDialects { #[test] fn parse_json_ops_without_colon() { - use self::JsonOperator; - let binary_ops = &[ - ("->", JsonOperator::Arrow, all_dialects()), - ("->>", JsonOperator::LongArrow, all_dialects()), - ("#>", JsonOperator::HashArrow, pg_and_generic()), - ("#>>", JsonOperator::HashLongArrow, pg_and_generic()), - ("@>", JsonOperator::AtArrow, all_dialects()), - ("<@", JsonOperator::ArrowAt, all_dialects()), - ("#-", JsonOperator::HashMinus, pg_and_generic()), - ("@?", JsonOperator::AtQuestion, all_dialects()), - ("@@", JsonOperator::AtAt, all_dialects()), + use self::BinaryOperator::*; + let binary_ops = [ + ("->", Arrow, all_dialects()), + ("->>", LongArrow, all_dialects()), + ("#>", HashArrow, pg_and_generic()), + ("#>>", HashLongArrow, pg_and_generic()), + ("@>", AtArrow, all_dialects()), + ("<@", ArrowAt, all_dialects()), + ("#-", HashMinus, pg_and_generic()), + ("@?", AtQuestion, all_dialects()), + ("@@", AtAt, all_dialects()), ]; for (str_op, op, dialects) in binary_ops { let select = dialects.verified_only_select(&format!("SELECT a {} b", &str_op)); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: *op, + op, right: Box::new(Expr::Identifier(Ident::new("b"))), }), select.projection[0] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index bd0f67f6e..94ab0d420 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2235,9 +2235,9 @@ fn test_json() { let sql = "SELECT params ->> 'name' FROM events"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("params"))), - operator: JsonOperator::LongArrow, + op: BinaryOperator::LongArrow, right: Box::new(Expr::Value(Value::SingleQuotedString("name".to_string()))), }), select.projection[0] @@ -2246,9 +2246,9 @@ fn test_json() { let sql = "SELECT params -> 'name' FROM events"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("params"))), - operator: JsonOperator::Arrow, + op: BinaryOperator::Arrow, right: Box::new(Expr::Value(Value::SingleQuotedString("name".to_string()))), }), select.projection[0] @@ -2257,15 +2257,55 @@ fn test_json() { let sql = "SELECT info -> 'items' ->> 'product' FROM orders"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("info"))), - operator: JsonOperator::Arrow, - right: Box::new(Expr::JsonAccess { - left: Box::new(Expr::Value(Value::SingleQuotedString("items".to_string()))), - operator: JsonOperator::LongArrow, - right: Box::new(Expr::Value(Value::SingleQuotedString( - "product".to_string() - ))) + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("info"))), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Value(Value::SingleQuotedString("items".to_string()))) + }), + op: BinaryOperator::LongArrow, + right: Box::new(Expr::Value(Value::SingleQuotedString( + "product".to_string() + ))), + }), + select.projection[0] + ); + + // the RHS can be a number (array element access) + let sql = "SELECT obj -> 42"; + let select = pg().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("obj"))), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Value(number("42"))), + }), + select.projection[0] + ); + + // the RHS can be an identifier + let sql = "SELECT obj -> key"; + let select = pg().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("obj"))), + op: BinaryOperator::Arrow, + right: Box::new(Expr::Identifier(Ident::new("key"))), + }), + select.projection[0] + ); + + // -> operator has lower precedence than arithmetic ops + let sql = "SELECT obj -> 3 * 2"; + let select = pg().verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("obj"))), + op: BinaryOperator::Arrow, + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3"))), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value(number("2"))), }), }), select.projection[0] @@ -2274,9 +2314,9 @@ fn test_json() { let sql = "SELECT info #> '{a,b,c}' FROM orders"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("info"))), - operator: JsonOperator::HashArrow, + op: BinaryOperator::HashArrow, right: Box::new(Expr::Value(Value::SingleQuotedString( "{a,b,c}".to_string() ))), @@ -2287,9 +2327,9 @@ fn test_json() { let sql = "SELECT info #>> '{a,b,c}' FROM orders"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("info"))), - operator: JsonOperator::HashLongArrow, + op: BinaryOperator::HashLongArrow, right: Box::new(Expr::Value(Value::SingleQuotedString( "{a,b,c}".to_string() ))), @@ -2300,9 +2340,9 @@ fn test_json() { let sql = "SELECT info FROM orders WHERE info @> '{\"a\": 1}'"; let select = pg().verified_only_select(sql); assert_eq!( - Expr::JsonAccess { + Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("info"))), - operator: JsonOperator::AtArrow, + op: BinaryOperator::AtArrow, right: Box::new(Expr::Value(Value::SingleQuotedString( "{\"a\": 1}".to_string() ))), @@ -2313,11 +2353,11 @@ fn test_json() { let sql = "SELECT info FROM orders WHERE '{\"a\": 1}' <@ info"; let select = pg().verified_only_select(sql); assert_eq!( - Expr::JsonAccess { + Expr::BinaryOp { left: Box::new(Expr::Value(Value::SingleQuotedString( "{\"a\": 1}".to_string() ))), - operator: JsonOperator::ArrowAt, + op: BinaryOperator::ArrowAt, right: Box::new(Expr::Identifier(Ident::new("info"))), }, select.selection.unwrap(), @@ -2326,9 +2366,9 @@ fn test_json() { let sql = "SELECT info #- ARRAY['a', 'b'] FROM orders"; let select = pg().verified_only_select(sql); assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { + SelectItem::UnnamedExpr(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::from("info"))), - operator: JsonOperator::HashMinus, + op: BinaryOperator::HashMinus, right: Box::new(Expr::Array(Array { elem: vec![ Expr::Value(Value::SingleQuotedString("a".to_string())), @@ -2343,9 +2383,9 @@ fn test_json() { let sql = "SELECT info FROM orders WHERE info @? '$.a'"; let select = pg().verified_only_select(sql); assert_eq!( - Expr::JsonAccess { + Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::from("info"))), - operator: JsonOperator::AtQuestion, + op: BinaryOperator::AtQuestion, right: Box::new(Expr::Value(Value::SingleQuotedString("$.a".to_string())),), }, select.selection.unwrap(), @@ -2354,9 +2394,9 @@ fn test_json() { let sql = "SELECT info FROM orders WHERE info @@ '$.a'"; let select = pg().verified_only_select(sql); assert_eq!( - Expr::JsonAccess { + Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::from("info"))), - operator: JsonOperator::AtAt, + op: BinaryOperator::AtAt, right: Box::new(Expr::Value(Value::SingleQuotedString("$.a".to_string())),), }, select.selection.unwrap(), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 469e6739f..59630814a 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -183,71 +183,167 @@ fn parse_lateral_flatten() { snowflake().verified_only_select(r#"SELECT emp.employee_ID, emp.last_name, index, value AS project_name FROM employees AS emp, LATERAL FLATTEN(INPUT => emp.project_names) AS proj_names"#); } +// https://docs.snowflake.com/en/user-guide/querying-semistructured #[test] -fn parse_json_using_colon() { +fn parse_semi_structured_data_traversal() { + // most basic case let sql = "SELECT a:b FROM t"; let select = snowflake().verified_only_select(sql); assert_eq!( SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("b".to_string()))), + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "b".to_owned(), + quoted: false + }] + }, }), select.projection[0] ); - let sql = "SELECT a:type FROM t"; + // identifier can be quoted + let sql = r#"SELECT a:"my long object key name" FROM t"#; let select = snowflake().verified_only_select(sql); assert_eq!( SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("type".to_string()))), + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "my long object key name".to_owned(), + quoted: true + }] + }, }), select.projection[0] ); - let sql = "SELECT a:location FROM t"; + // expressions are allowed in bracket notation + let sql = r#"SELECT a[2 + 2] FROM t"#; let select = snowflake().verified_only_select(sql); assert_eq!( SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("location".to_string()))), - }), - select.projection[0] - ); - - let sql = "SELECT a:date FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("date".to_string()))), + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Bracket { + key: Expr::BinaryOp { + left: Box::new(Expr::Value(number("2"))), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("2"))) + }, + }] + }, }), select.projection[0] ); snowflake().verified_stmt("SELECT a:b::INT FROM t"); - let sql = "SELECT a:start, a:end FROM t"; + // unquoted keywords are permitted in the object key + let sql = "SELECT a:select, a:from FROM t"; let select = snowflake().verified_only_select(sql); assert_eq!( vec![ SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("start".to_string()))), + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "select".to_owned(), + quoted: false + }] + }, }), SelectItem::UnnamedExpr(Expr::JsonAccess { - left: Box::new(Expr::Identifier(Ident::new("a"))), - operator: JsonOperator::Colon, - right: Box::new(Expr::Value(Value::UnQuotedString("end".to_string()))), + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "from".to_owned(), + quoted: false + }] + }, }) ], select.projection ); + + // multiple levels can be traversed + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo."bar".baz"#; + let select = snowflake().verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: true, + }, + JsonPathElem::Dot { + key: "baz".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); + + // dot and bracket notation can be mixed (starting with : case) + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo[0].bar"#; + let select = snowflake().verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::Value(number("0")), + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); + + // dot and bracket notation can be mixed (starting with bracket case) + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a[0].foo.bar"#; + let select = snowflake().verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Bracket { + key: Expr::Value(number("0")), + }, + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); } #[test]