Skip to content

Commit ec81dd2

Browse files
authored
feat: add bbq.json_query and warn bbq.json_extract deprecated (#1756)
1 parent ad5b98c commit ec81dd2

File tree

6 files changed

+108
-5
lines changed

6 files changed

+108
-5
lines changed

bigframes/bigquery/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
json_extract,
3838
json_extract_array,
3939
json_extract_string_array,
40+
json_query,
4041
json_set,
4142
json_value,
4243
parse_json,
@@ -58,10 +59,11 @@
5859
"st_distance",
5960
"st_intersection",
6061
# json ops
61-
"json_set",
6262
"json_extract",
6363
"json_extract_array",
6464
"json_extract_string_array",
65+
"json_query",
66+
"json_set",
6567
"json_value",
6668
"parse_json",
6769
# search ops

bigframes/bigquery/_operations/json.py

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,11 @@
2222
from __future__ import annotations
2323

2424
from typing import Any, cast, Optional, Sequence, Tuple, Union
25+
import warnings
2526

2627
import bigframes.core.utils as utils
2728
import bigframes.dtypes
29+
import bigframes.exceptions as bfe
2830
import bigframes.operations as ops
2931
import bigframes.series as series
3032

@@ -87,9 +89,13 @@ def json_extract(
8789
input: series.Series,
8890
json_path: str,
8991
) -> series.Series:
90-
"""Extracts a JSON value and converts it to a SQL JSON-formatted `STRING` or `JSON`
91-
value. This function uses single quotes and brackets to escape invalid JSONPath
92-
characters in JSON keys.
92+
"""Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING`` or
93+
``JSON`` value. This function uses single quotes and brackets to escape invalid
94+
JSONPath characters in JSON keys.
95+
96+
.. deprecated:: 2.5.0
97+
The ``json_extract`` is deprecated and will be removed in a future version.
98+
Use ``json_query`` instead.
9399
94100
**Examples:**
95101
@@ -111,6 +117,11 @@ def json_extract(
111117
Returns:
112118
bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
113119
"""
120+
msg = (
121+
"The `json_extract` is deprecated and will be removed in a future version. "
122+
"Use `json_query` instead."
123+
)
124+
warnings.warn(bfe.format_message(msg), category=UserWarning)
114125
return input._apply_unary_op(ops.JSONExtract(json_path=json_path))
115126

116127

@@ -231,6 +242,37 @@ def json_extract_string_array(
231242
return array_series
232243

233244

245+
def json_query(
246+
input: series.Series,
247+
json_path: str,
248+
) -> series.Series:
249+
"""Extracts a JSON value and converts it to a SQL JSON-formatted ``STRING``
250+
or ``JSON`` value. This function uses double quotes to escape invalid JSONPath
251+
characters in JSON keys. For example: ``"a.b"``.
252+
253+
**Examples:**
254+
255+
>>> import bigframes.pandas as bpd
256+
>>> import bigframes.bigquery as bbq
257+
>>> bpd.options.display.progress_bar = None
258+
259+
>>> s = bpd.Series(['{"class": {"students": [{"id": 5}, {"id": 12}]}}'])
260+
>>> bbq.json_query(s, json_path="$.class")
261+
0 {"students":[{"id":5},{"id":12}]}
262+
dtype: string
263+
264+
Args:
265+
input (bigframes.series.Series):
266+
The Series containing JSON data (as native JSON objects or JSON-formatted strings).
267+
json_path (str):
268+
The JSON path identifying the data that you want to obtain from the input.
269+
270+
Returns:
271+
bigframes.series.Series: A new Series with the JSON or JSON-formatted STRING.
272+
"""
273+
return input._apply_unary_op(ops.JSONQuery(json_path=json_path))
274+
275+
234276
def json_value(
235277
input: series.Series,
236278
json_path: str,

bigframes/core/compile/scalar_op_compiler.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1356,6 +1356,19 @@ def json_extract_string_array_op_impl(
13561356
return json_extract_string_array(json_obj=x, json_path=op.json_path)
13571357

13581358

1359+
@scalar_op_compiler.register_unary_op(ops.JSONQuery, pass_op=True)
1360+
def json_query_op_impl(x: ibis_types.Value, op: ops.JSONQuery):
1361+
# Define a user-defined function whose returned type is dynamically matching the input.
1362+
def json_query(json_or_json_string, json_path: ibis_dtypes.str): # type: ignore
1363+
"""Extracts a JSON value and converts it to a SQL JSON-formatted STRING or JSON value."""
1364+
...
1365+
1366+
return_type = x.type()
1367+
json_query.__annotations__["return"] = return_type
1368+
json_query_op = ibis_udf.scalar.builtin(json_query)
1369+
return json_query_op(json_or_json_string=x, json_path=op.json_path)
1370+
1371+
13591372
@scalar_op_compiler.register_unary_op(ops.ParseJSON, pass_op=True)
13601373
def parse_json_op_impl(x: ibis_types.Value, op: ops.ParseJSON):
13611374
return parse_json(json_str=x)

bigframes/operations/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@
106106
JSONExtract,
107107
JSONExtractArray,
108108
JSONExtractStringArray,
109+
JSONQuery,
109110
JSONSet,
110111
JSONValue,
111112
ParseJSON,
@@ -355,6 +356,7 @@
355356
"JSONExtract",
356357
"JSONExtractArray",
357358
"JSONExtractStringArray",
359+
"JSONQuery",
358360
"JSONSet",
359361
"JSONValue",
360362
"ParseJSON",

bigframes/operations/json_ops.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,18 @@ def output_type(self, *input_types):
134134
+ f" Received type: {input_type}"
135135
)
136136
return dtypes.STRING_DTYPE
137+
138+
139+
@dataclasses.dataclass(frozen=True)
140+
class JSONQuery(base_ops.UnaryOp):
141+
name: typing.ClassVar[str] = "json_query"
142+
json_path: str
143+
144+
def output_type(self, *input_types):
145+
input_type = input_types[0]
146+
if not dtypes.is_json_like(input_type):
147+
raise TypeError(
148+
"Input type must be a valid JSON object or JSON-formatted string type."
149+
+ f" Received type: {input_type}"
150+
)
151+
return input_type

tests/system/small/bigquery/test_json.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def test_json_extract_from_json():
9999
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
100100
dtype=dtypes.JSON_DTYPE,
101101
)
102-
actual = bbq.json_extract(s, "$.a.b")
102+
with pytest.warns(UserWarning, match="The `json_extract` is deprecated"):
103+
actual = bbq.json_extract(s, "$.a.b")
103104
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
104105

105106
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
@@ -212,6 +213,34 @@ def test_json_extract_string_array_w_invalid_series_type():
212213
bbq.json_extract_string_array(s)
213214

214215

216+
def test_json_query_from_json():
217+
s = bpd.Series(
218+
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
219+
dtype=dtypes.JSON_DTYPE,
220+
)
221+
actual = bbq.json_query(s, "$.a.b")
222+
expected = bpd.Series(["[1, 2]", None, "0"], dtype=dtypes.JSON_DTYPE)
223+
224+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
225+
226+
227+
def test_json_query_from_string():
228+
s = bpd.Series(
229+
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],
230+
dtype=pd.StringDtype(storage="pyarrow"),
231+
)
232+
actual = bbq.json_query(s, "$.a.b")
233+
expected = bpd.Series(["[1,2]", None, "0"], dtype=pd.StringDtype(storage="pyarrow"))
234+
235+
pd.testing.assert_series_equal(actual.to_pandas(), expected.to_pandas())
236+
237+
238+
def test_json_query_w_invalid_series_type():
239+
s = bpd.Series([1, 2])
240+
with pytest.raises(TypeError):
241+
bbq.json_query(s, "$.a")
242+
243+
215244
def test_json_value_from_json():
216245
s = bpd.Series(
217246
['{"a": {"b": [1, 2]}}', '{"a": {"c": 1}}', '{"a": {"b": 0}}'],

0 commit comments

Comments
 (0)