From e0ff186de7eab947b0ea71a3630de6cb09949f47 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Fri, 21 Feb 2025 00:39:07 +0000 Subject: [PATCH 1/2] fix: remove unbox json functionality from JSONArrowType --- db_dtypes/__init__.py | 3 +- db_dtypes/json.py | 10 ------ tests/unit/test_json.py | 69 ++++++++++++++++------------------------- 3 files changed, 28 insertions(+), 54 deletions(-) diff --git a/db_dtypes/__init__.py b/db_dtypes/__init__.py index d5b05dc..2424ff4 100644 --- a/db_dtypes/__init__.py +++ b/db_dtypes/__init__.py @@ -50,7 +50,7 @@ # To use JSONArray and JSONDtype, you'll need Pandas 1.5.0 or later. With the removal # of Python 3.7 compatibility, the minimum Pandas version will be updated to 1.5.0. if packaging.version.Version(pandas.__version__) >= packaging.version.Version("1.5.0"): - from db_dtypes.json import JSONArray, JSONArrowScalar, JSONArrowType, JSONDtype + from db_dtypes.json import JSONArray, JSONArrowType, JSONDtype else: JSONArray = None JSONDtype = None @@ -375,7 +375,6 @@ def __sub__(self, other): "JSONDtype", "JSONArray", "JSONArrowType", - "JSONArrowScalar", "TimeArray", "TimeDtype", ] diff --git a/db_dtypes/json.py b/db_dtypes/json.py index 835d638..37aad83 100644 --- a/db_dtypes/json.py +++ b/db_dtypes/json.py @@ -256,13 +256,6 @@ def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray: return result -class JSONArrowScalar(pa.ExtensionScalar): - def as_py(self, **kwargs): - return JSONArray._deserialize_json( - self.value.as_py(**kwargs) if self.value else None - ) - - class JSONArrowType(pa.ExtensionType): """Arrow extension type for the `dbjson` Pandas extension type.""" @@ -282,9 +275,6 @@ def __hash__(self) -> int: def to_pandas_dtype(self): return JSONDtype() - def __arrow_ext_scalar_class__(self): - return JSONArrowScalar - # Register the type to be included in RecordBatches, sent over IPC and received in # another Python process. diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index ff2c867..503c53c 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -160,20 +160,15 @@ def test_json_arrow_to_pandas(): s = arr.to_pandas() assert isinstance(s.dtypes, db_dtypes.JSONDtype) assert s[0] - assert s[1] == 100 - assert math.isclose(s[2], 0.98) - assert s[3] == "hello world" - assert math.isclose(s[4][0], 0.1) - assert math.isclose(s[4][1], 0.2) - assert s[5] == { - "null_field": None, - "order": { - "items": ["book", "pen", "computer"], - "total": 15, - "address": {"street": "123 Main St", "city": "Anytown"}, - }, - } - assert pd.isna(s[6]) + assert s[1] == "100" + assert s[2] == "0.98" + assert s[3] == '"hello world"' + assert s[4] == "[0.1,0.2]" + assert ( + s[5] + == '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}' + ) + assert s[6] == "null" def test_json_arrow_to_pylist(): @@ -186,20 +181,15 @@ def test_json_arrow_to_pylist(): s = arr.to_pylist() assert isinstance(s, list) assert s[0] - assert s[1] == 100 - assert math.isclose(s[2], 0.98) - assert s[3] == "hello world" - assert math.isclose(s[4][0], 0.1) - assert math.isclose(s[4][1], 0.2) - assert s[5] == { - "null_field": None, - "order": { - "items": ["book", "pen", "computer"], - "total": 15, - "address": {"street": "123 Main St", "city": "Anytown"}, - }, - } - assert s[6] is None + assert s[1] == "100" + assert s[2] == "0.98" + assert s[3] == '"hello world"' + assert s[4] == "[0.1,0.2]" + assert ( + s[5] + == '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}' + ) + assert s[6] == "null" def test_json_arrow_record_batch(): @@ -226,17 +216,12 @@ def test_json_arrow_record_batch(): assert isinstance(s, list) assert s[0] - assert s[1] == 100 - assert math.isclose(s[2], 0.98) - assert s[3] == "hello world" - assert math.isclose(s[4][0], 0.1) - assert math.isclose(s[4][1], 0.2) - assert s[5] == { - "null_field": None, - "order": { - "items": ["book", "pen", "computer"], - "total": 15, - "address": {"street": "123 Main St", "city": "Anytown"}, - }, - } - assert s[6] is None + assert s[1] == "100" + assert s[2] == "0.98" + assert s[3] == '"hello world"' + assert s[4] == "[0.1,0.2]" + assert ( + s[5] + == '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}' + ) + assert s[6] == "null" From 95bd56bced0f41058b2e7f6d46e4d0d4fc0bd849 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 4 Mar 2025 22:55:11 +0000 Subject: [PATCH 2/2] lint --- tests/unit/test_json.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit/test_json.py b/tests/unit/test_json.py index 503c53c..d15cfc7 100644 --- a/tests/unit/test_json.py +++ b/tests/unit/test_json.py @@ -13,7 +13,6 @@ # limitations under the License. import json -import math import numpy as np import pandas as pd