Skip to content

Improve parsing of JSON accesses on Postgres and Snowflake #1215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 30, 2024
141 changes: 66 additions & 75 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ pub use self::query::{
ValueTableMode, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString,
TrimWhereField, Value,
};

use crate::ast::helpers::stmt_data_loading::{
Expand Down Expand Up @@ -266,66 +267,6 @@ impl fmt::Display for Interval {
}
}

/// JsonOperator
#[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonOperator {
/// -> keeps the value as json
Arrow,
/// ->> keeps the value as text or int.
LongArrow,
/// #> Extracts JSON sub-object at the specified path
HashArrow,
/// #>> Extracts JSON sub-object at the specified path as text
HashLongArrow,
/// : Colon is used by Snowflake (Which is similar to LongArrow)
Colon,
/// jsonb @> jsonb -> boolean: Test whether left json contains the right json
AtArrow,
/// jsonb <@ jsonb -> boolean: Test whether right json contains the left json
ArrowAt,
/// jsonb #- text[] -> jsonb: Deletes the field or array element at the specified
/// path, where path elements can be either field keys or array indexes.
HashMinus,
/// jsonb @? jsonpath -> boolean: Does JSON path return any item for the specified
/// JSON value?
AtQuestion,
/// jsonb @@ jsonpath → boolean: Returns the result of a JSON path predicate check
/// for the specified JSON value. Only the first item of the result is taken into
/// account. If the result is not Boolean, then NULL is returned.
AtAt,
}

impl fmt::Display for JsonOperator {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
JsonOperator::Arrow => {
write!(f, "->")
}
JsonOperator::LongArrow => {
write!(f, "->>")
}
JsonOperator::HashArrow => {
write!(f, "#>")
}
JsonOperator::HashLongArrow => {
write!(f, "#>>")
}
JsonOperator::Colon => {
write!(f, ":")
}
JsonOperator::AtArrow => {
write!(f, "@>")
}
JsonOperator::ArrowAt => write!(f, "<@"),
JsonOperator::HashMinus => write!(f, "#-"),
JsonOperator::AtQuestion => write!(f, "@?"),
JsonOperator::AtAt => write!(f, "@@"),
}
}
}

/// A field definition within a struct.
///
/// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type
Expand Down Expand Up @@ -408,6 +349,59 @@ impl fmt::Display for MapAccessKey {
}
}

/// An element of a JSON path.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum JsonPathElem {
/// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation>.
Dot { key: String, quoted: bool },
/// Accesses an object field or array element using bracket notation,
/// e.g. `obj['foo']`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured#bracket-notation>.
Bracket { key: Expr },
}

/// A JSON path.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/sql-ref-json-path-expression.html>.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct JsonPath {
pub path: Vec<JsonPathElem>,
}

impl fmt::Display for JsonPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, elem) in self.path.iter().enumerate() {
match elem {
JsonPathElem::Dot { key, quoted } => {
if i == 0 {
write!(f, ":")?;
} else {
write!(f, ".")?;
}

if *quoted {
write!(f, "\"{}\"", escape_double_quote_string(key))?;
} else {
write!(f, "{key}")?;
}
}
JsonPathElem::Bracket { key } => {
write!(f, "[{key}]")?;
}
}
}
Ok(())
}
}

/// An SQL expression of any type.
///
/// The parser does not distinguish between expressions of different types
Expand All @@ -425,11 +419,16 @@ pub enum Expr {
Identifier(Ident),
/// Multi-part identifier, e.g. `table_alias.column` or `schema.table.col`
CompoundIdentifier(Vec<Ident>),
/// JSON access (postgres) eg: data->'tags'
/// Access data nested in a value containing semi-structured data, such as
/// the `VARIANT` type on Snowflake. for example `src:customer[0].name`.
///
/// See <https://docs.snowflake.com/en/user-guide/querying-semistructured>.
/// See <https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html>.
JsonAccess {
left: Box<Expr>,
operator: JsonOperator,
right: Box<Expr>,
/// The value being queried.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This makes much more sense to me

value: Box<Expr>,
/// The path to the data to extract.
path: JsonPath,
},
/// CompositeAccess (postgres) eg: SELECT (information_schema._pg_expandarray(array['i','i'])).n
CompositeAccess {
Expand Down Expand Up @@ -1210,16 +1209,8 @@ impl fmt::Display for Expr {
Expr::Array(set) => {
write!(f, "{set}")
}
Expr::JsonAccess {
left,
operator,
right,
} => {
if operator == &JsonOperator::Colon {
write!(f, "{left}{operator}{right}")
} else {
write!(f, "{left} {operator} {right}")
}
Expr::JsonAccess { value, path } => {
write!(f, "{value}{path}")
}
Expr::CompositeAccess { expr, key } => {
write!(f, "{expr}.{key}")
Expand Down
82 changes: 82 additions & 0 deletions src/ast/operator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,79 @@ pub enum BinaryOperator {
PGNotILikeMatch,
/// String "starts with", eg: `a ^@ b` (PostgreSQL-specific)
PGStartsWith,
/// The `->` operator.
///
/// On PostgreSQL, this operator extracts a JSON object field or array
/// element, for example `'{"a":"b"}'::json -> 'a'` or `[1, 2, 3]'::json
/// -> 2`.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
Arrow,
/// The `->>` operator.
///
/// On PostgreSQL, this operator that extracts a JSON object field or JSON
/// array element and converts it to text, for example `'{"a":"b"}'::json
/// ->> 'a'` or `[1, 2, 3]'::json ->> 2`.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
LongArrow,
/// The `#>` operator.
///
/// On PostgreSQL, this operator extracts a JSON sub-object at the specified
/// path, for example:
///
/// ```notrust
///'{"a": {"b": ["foo","bar"]}}'::json #> '{a,b,1}'
/// ```
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashArrow,
/// The `#>>` operator.
///
/// A PostgreSQL-specific operator that extracts JSON sub-object at the
/// specified path, for example
///
/// ```notrust
///'{"a": {"b": ["foo","bar"]}}'::json #>> '{a,b,1}'
/// ```
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashLongArrow,
/// The `@@` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
AtAt,
/// The `@>` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
AtArrow,
/// The `<@` operator.
///
/// On PostgreSQL, this is used for JSON and text searches.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
/// See <https://www.postgresql.org/docs/current/functions-textsearch.html>.
ArrowAt,
/// The `#-` operator.
///
/// On PostgreSQL, this operator is used to delete a field or array element
/// at a specified path.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
HashMinus,
/// The `@?` operator.
///
/// On PostgreSQL, this operator is used to check the given JSON path
/// returns an item for the JSON value.
///
/// See <https://www.postgresql.org/docs/current/functions-json.html>.
AtQuestion,
/// PostgreSQL-specific custom operator.
///
/// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html)
Expand Down Expand Up @@ -187,6 +260,15 @@ impl fmt::Display for BinaryOperator {
BinaryOperator::PGNotLikeMatch => f.write_str("!~~"),
BinaryOperator::PGNotILikeMatch => f.write_str("!~~*"),
BinaryOperator::PGStartsWith => f.write_str("^@"),
BinaryOperator::Arrow => f.write_str("->"),
BinaryOperator::LongArrow => f.write_str("->>"),
BinaryOperator::HashArrow => f.write_str("#>"),
BinaryOperator::HashLongArrow => f.write_str("#>>"),
BinaryOperator::AtAt => f.write_str("@@"),
BinaryOperator::AtArrow => f.write_str("@>"),
BinaryOperator::ArrowAt => f.write_str("<@"),
BinaryOperator::HashMinus => f.write_str("#-"),
BinaryOperator::AtQuestion => f.write_str("@?"),
BinaryOperator::PGCustomBinaryOperator(idents) => {
write!(f, "OPERATOR({})", display_separated(idents, "."))
}
Expand Down
3 changes: 0 additions & 3 deletions src/ast/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ pub enum Value {
Null,
/// `?` or `$` Prepared statement arg placeholder
Placeholder(String),
/// Add support of snowflake field:key - key should be a value
UnQuotedString(String),
}

impl fmt::Display for Value {
Expand All @@ -85,7 +83,6 @@ impl fmt::Display for Value {
Value::RawStringLiteral(v) => write!(f, "R'{v}'"),
Value::Null => write!(f, "NULL"),
Value::Placeholder(v) => write!(f, "{v}"),
Value::UnQuotedString(v) => write!(f, "{v}"),
}
}
}
Expand Down
Loading
Loading