Skip to content

Commit d1d3d22

Browse files
jmhainiffyio
authored andcommitted
Improve parsing of JSON accesses on Postgres and Snowflake (apache#1215)
Co-authored-by: Ifeanyi Ubah <[email protected]>
1 parent 7e6cfac commit d1d3d22

File tree

7 files changed

+432
-199
lines changed

7 files changed

+432
-199
lines changed

src/ast/mod.rs

+66-75
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,8 @@ pub use self::query::{
5151
Top, TopQuantity, ValueTableMode, Values, WildcardAdditionalOptions, With,
5252
};
5353
pub use self::value::{
54-
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
54+
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
55+
TrimWhereField, Value,
5556
};
5657

5758
use crate::ast::helpers::stmt_data_loading::{
@@ -270,66 +271,6 @@ impl fmt::Display for Interval {
270271
}
271272
}
272273

273-
/// JsonOperator
274-
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
275-
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
276-
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
277-
pub enum JsonOperator {
278-
/// -> keeps the value as json
279-
Arrow,
280-
/// ->> keeps the value as text or int.
281-
LongArrow,
282-
/// #> Extracts JSON sub-object at the specified path
283-
HashArrow,
284-
/// #>> Extracts JSON sub-object at the specified path as text
285-
HashLongArrow,
286-
/// : Colon is used by Snowflake (Which is similar to LongArrow)
287-
Colon,
288-
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
289-
AtArrow,
290-
/// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
291-
ArrowAt,
292-
/// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
293-
/// path, where path elements can be either field keys or array indexes.
294-
HashMinus,
295-
/// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
296-
/// JSON value?
297-
AtQuestion,
298-
/// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
299-
/// for the specified JSON value. Only the first item of the result is taken into
300-
/// account. If the result is not Boolean, then NULL is returned.
301-
AtAt,
302-
}
303-
304-
impl fmt::Display for JsonOperator {
305-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
306-
match self {
307-
JsonOperator::Arrow => {
308-
write!(f, "->")
309-
}
310-
JsonOperator::LongArrow => {
311-
write!(f, "->>")
312-
}
313-
JsonOperator::HashArrow => {
314-
write!(f, "#>")
315-
}
316-
JsonOperator::HashLongArrow => {
317-
write!(f, "#>>")
318-
}
319-
JsonOperator::Colon => {
320-
write!(f, ":")
321-
}
322-
JsonOperator::AtArrow => {
323-
write!(f, "@>")
324-
}
325-
JsonOperator::ArrowAt => write!(f, "<@"),
326-
JsonOperator::HashMinus => write!(f, "#-"),
327-
JsonOperator::AtQuestion => write!(f, "@?"),
328-
JsonOperator::AtAt => write!(f, "@@"),
329-
}
330-
}
331-
}
332-
333274
/// A field definition within a struct.
334275
///
335276
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
@@ -412,6 +353,59 @@ impl fmt::Display for MapAccessKey {
412353
}
413354
}
414355

356+
/// An element of a JSON path.
357+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
358+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
359+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
360+
pub enum JsonPathElem {
361+
/// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`.
362+
///
363+
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation>.
364+
Dot { key: String, quoted: bool },
365+
/// Accesses an object field or array element using bracket notation,
366+
/// e.g. `obj['foo']`.
367+
///
368+
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
369+
Bracket { key: Expr },
370+
}
371+
372+
/// A JSON path.
373+
///
374+
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
375+
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-json-path-expression.html>.
376+
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
377+
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
378+
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
379+
pub struct JsonPath {
380+
pub path: Vec<JsonPathElem>,
381+
}
382+
383+
impl fmt::Display for JsonPath {
384+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
385+
for (i, elem) in self.path.iter().enumerate() {
386+
match elem {
387+
JsonPathElem::Dot { key, quoted } => {
388+
if i == 0 {
389+
write!(f, ":")?;
390+
} else {
391+
write!(f, ".")?;
392+
}
393+
394+
if *quoted {
395+
write!(f, "\"{}\"", escape_double_quote_string(key))?;
396+
} else {
397+
write!(f, "{key}")?;
398+
}
399+
}
400+
JsonPathElem::Bracket { key } => {
401+
write!(f, "[{key}]")?;
402+
}
403+
}
404+
}
405+
Ok(())
406+
}
407+
}
408+
415409
/// The syntax used for in a cast expression.
416410
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
417411
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -449,11 +443,16 @@ pub enum Expr {
449443
Identifier(Ident),
450444
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
451445
CompoundIdentifier(Vec<Ident>),
452-
/// JSON access (postgres) eg: data->'tags'
446+
/// Access data nested in a value containing semi-structured data, such as
447+
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
448+
///
449+
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
450+
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>.
453451
JsonAccess {
454-
left: Box<Expr>,
455-
operator: JsonOperator,
456-
right: Box<Expr>,
452+
/// The value being queried.
453+
value: Box<Expr>,
454+
/// The path to the data to extract.
455+
path: JsonPath,
457456
},
458457
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
459458
CompositeAccess {
@@ -1224,16 +1223,8 @@ impl fmt::Display for Expr {
12241223
Expr::Array(set) => {
12251224
write!(f, "{set}")
12261225
}
1227-
Expr::JsonAccess {
1228-
left,
1229-
operator,
1230-
right,
1231-
} => {
1232-
if operator == &JsonOperator::Colon {
1233-
write!(f, "{left}{operator}{right}")
1234-
} else {
1235-
write!(f, "{left} {operator} {right}")
1236-
}
1226+
Expr::JsonAccess { value, path } => {
1227+
write!(f, "{value}{path}")
12371228
}
12381229
Expr::CompositeAccess { expr, key } => {
12391230
write!(f, "{expr}.{key}")

src/ast/operator.rs

+82
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,79 @@ pub enum BinaryOperator {
141141
PGNotILikeMatch,
142142
/// String "starts with", eg: `a ^@ b` (PostgreSQL-specific)
143143
PGStartsWith,
144+
/// The `->` operator.
145+
///
146+
/// On PostgreSQL, this operator extracts a JSON object field or array
147+
/// element, for example `'{"a":"b"}'::json -> 'a'` or `[1, 2, 3]'::json
148+
/// -> 2`.
149+
///
150+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
151+
Arrow,
152+
/// The `->>` operator.
153+
///
154+
/// On PostgreSQL, this operator that extracts a JSON object field or JSON
155+
/// array element and converts it to text, for example `'{"a":"b"}'::json
156+
/// ->> 'a'` or `[1, 2, 3]'::json ->> 2`.
157+
///
158+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
159+
LongArrow,
160+
/// The `#>` operator.
161+
///
162+
/// On PostgreSQL, this operator extracts a JSON sub-object at the specified
163+
/// path, for example:
164+
///
165+
/// ```notrust
166+
///'{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}'
167+
/// ```
168+
///
169+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
170+
HashArrow,
171+
/// The `#>>` operator.
172+
///
173+
/// A PostgreSQL-specific operator that extracts JSON sub-object at the
174+
/// specified path, for example
175+
///
176+
/// ```notrust
177+
///'{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}'
178+
/// ```
179+
///
180+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
181+
HashLongArrow,
182+
/// The `@@` operator.
183+
///
184+
/// On PostgreSQL, this is used for JSON and text searches.
185+
///
186+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
187+
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
188+
AtAt,
189+
/// The `@>` operator.
190+
///
191+
/// On PostgreSQL, this is used for JSON and text searches.
192+
///
193+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
194+
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
195+
AtArrow,
196+
/// The `<@` operator.
197+
///
198+
/// On PostgreSQL, this is used for JSON and text searches.
199+
///
200+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
201+
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
202+
ArrowAt,
203+
/// The `#-` operator.
204+
///
205+
/// On PostgreSQL, this operator is used to delete a field or array element
206+
/// at a specified path.
207+
///
208+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
209+
HashMinus,
210+
/// The `@?` operator.
211+
///
212+
/// On PostgreSQL, this operator is used to check the given JSON path
213+
/// returns an item for the JSON value.
214+
///
215+
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
216+
AtQuestion,
144217
/// PostgreSQL-specific custom operator.
145218
///
146219
/// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html)
@@ -187,6 +260,15 @@ impl fmt::Display for BinaryOperator {
187260
BinaryOperator::PGNotLikeMatch => f.write_str("!~~"),
188261
BinaryOperator::PGNotILikeMatch => f.write_str("!~~*"),
189262
BinaryOperator::PGStartsWith => f.write_str("^@"),
263+
BinaryOperator::Arrow => f.write_str("->"),
264+
BinaryOperator::LongArrow => f.write_str("->>"),
265+
BinaryOperator::HashArrow => f.write_str("#>"),
266+
BinaryOperator::HashLongArrow => f.write_str("#>>"),
267+
BinaryOperator::AtAt => f.write_str("@@"),
268+
BinaryOperator::AtArrow => f.write_str("@>"),
269+
BinaryOperator::ArrowAt => f.write_str("<@"),
270+
BinaryOperator::HashMinus => f.write_str("#-"),
271+
BinaryOperator::AtQuestion => f.write_str("@?"),
190272
BinaryOperator::PGCustomBinaryOperator(idents) => {
191273
write!(f, "OPERATOR({})", display_separated(idents, "."))
192274
}

src/ast/value.rs

-3
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,6 @@ pub enum Value {
6565
Null,
6666
/// `?` or `$` Prepared statement arg placeholder
6767
Placeholder(String),
68-
/// Add support of snowflake field:key - key should be a value
69-
UnQuotedString(String),
7068
}
7169

7270
impl fmt::Display for Value {
@@ -85,7 +83,6 @@ impl fmt::Display for Value {
8583
Value::RawStringLiteral(v) => write!(f, "R'{v}'"),
8684
Value::Null => write!(f, "NULL"),
8785
Value::Placeholder(v) => write!(f, "{v}"),
88-
Value::UnQuotedString(v) => write!(f, "{v}"),
8986
}
9087
}
9188
}

0 commit comments

Comments
 (0)