Skip to content

fix: remove unbox json functionality from JSONArrowType #325

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions db_dtypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
# To use JSONArray and JSONDtype, you'll need Pandas 1.5.0 or later. With the removal
# of Python 3.7 compatibility, the minimum Pandas version will be updated to 1.5.0.
if packaging.version.Version(pandas.__version__) >= packaging.version.Version("1.5.0"):
from db_dtypes.json import JSONArray, JSONArrowScalar, JSONArrowType, JSONDtype
from db_dtypes.json import JSONArray, JSONArrowType, JSONDtype
else:
JSONArray = None
JSONDtype = None
Expand Down Expand Up @@ -375,7 +375,6 @@ def __sub__(self, other):
"JSONDtype",
"JSONArray",
"JSONArrowType",
"JSONArrowScalar",
"TimeArray",
"TimeDtype",
]
10 changes: 0 additions & 10 deletions db_dtypes/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,6 @@ def __array__(self, dtype=None, copy: bool | None = None) -> np.ndarray:
return result


class JSONArrowScalar(pa.ExtensionScalar):
def as_py(self, **kwargs):
return JSONArray._deserialize_json(
self.value.as_py(**kwargs) if self.value else None
)


class JSONArrowType(pa.ExtensionType):
"""Arrow extension type for the `dbjson` Pandas extension type."""

Expand All @@ -282,9 +275,6 @@ def __hash__(self) -> int:
def to_pandas_dtype(self):
return JSONDtype()

def __arrow_ext_scalar_class__(self):
return JSONArrowScalar


# Register the type to be included in RecordBatches, sent over IPC and received in
# another Python process.
Expand Down
70 changes: 27 additions & 43 deletions tests/unit/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.

import json
import math

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -160,20 +159,15 @@ def test_json_arrow_to_pandas():
s = arr.to_pandas()
assert isinstance(s.dtypes, db_dtypes.JSONDtype)
assert s[0]
assert s[1] == 100
assert math.isclose(s[2], 0.98)
assert s[3] == "hello world"
assert math.isclose(s[4][0], 0.1)
assert math.isclose(s[4][1], 0.2)
assert s[5] == {
"null_field": None,
"order": {
"items": ["book", "pen", "computer"],
"total": 15,
"address": {"street": "123 Main St", "city": "Anytown"},
},
}
assert pd.isna(s[6])
assert s[1] == "100"
assert s[2] == "0.98"
assert s[3] == '"hello world"'
assert s[4] == "[0.1,0.2]"
assert (
s[5]
== '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}'
)
assert s[6] == "null"


def test_json_arrow_to_pylist():
Expand All @@ -186,20 +180,15 @@ def test_json_arrow_to_pylist():
s = arr.to_pylist()
assert isinstance(s, list)
assert s[0]
assert s[1] == 100
assert math.isclose(s[2], 0.98)
assert s[3] == "hello world"
assert math.isclose(s[4][0], 0.1)
assert math.isclose(s[4][1], 0.2)
assert s[5] == {
"null_field": None,
"order": {
"items": ["book", "pen", "computer"],
"total": 15,
"address": {"street": "123 Main St", "city": "Anytown"},
},
}
assert s[6] is None
assert s[1] == "100"
assert s[2] == "0.98"
assert s[3] == '"hello world"'
assert s[4] == "[0.1,0.2]"
assert (
s[5]
== '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}'
)
assert s[6] == "null"


def test_json_arrow_record_batch():
Expand All @@ -226,17 +215,12 @@ def test_json_arrow_record_batch():

assert isinstance(s, list)
assert s[0]
assert s[1] == 100
assert math.isclose(s[2], 0.98)
assert s[3] == "hello world"
assert math.isclose(s[4][0], 0.1)
assert math.isclose(s[4][1], 0.2)
assert s[5] == {
"null_field": None,
"order": {
"items": ["book", "pen", "computer"],
"total": 15,
"address": {"street": "123 Main St", "city": "Anytown"},
},
}
assert s[6] is None
assert s[1] == "100"
assert s[2] == "0.98"
assert s[3] == '"hello world"'
assert s[4] == "[0.1,0.2]"
assert (
s[5]
== '{"null_field":null,"order":{"address":{"city":"Anytown","street":"123 Main St"},"items":["book","pen","computer"],"total":15}}'
)
assert s[6] == "null"