Skip to content

Commit 4b84e4a

Browse files
authored
fix: support dbjson type on pandas version 1.5 (#295)
* fix: support JSONDtype on pandas version 1.5 * sets constraints-3.9 for pandas 1.5.3 * fix test cov * fix format * nit * fix lint
1 parent 36109b1 commit 4b84e4a

File tree

3 files changed

+37
-27
lines changed

3 files changed

+37
-27
lines changed

db_dtypes/json.py

+32-10
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,25 @@ class JSONArray(arrays.ArrowExtensionArray):
7272

7373
_dtype = JSONDtype()
7474

75-
def __init__(self, values, dtype=None, copy=False) -> None:
75+
def __init__(self, values) -> None:
76+
super().__init__(values)
7677
self._dtype = JSONDtype()
7778
if isinstance(values, pa.Array):
78-
self._pa_array = pa.chunked_array([values])
79+
pa_data = pa.chunked_array([values])
7980
elif isinstance(values, pa.ChunkedArray):
80-
self._pa_array = values
81+
pa_data = values
8182
else:
82-
raise ValueError(f"Unsupported type '{type(values)}' for JSONArray")
83+
raise NotImplementedError(
84+
f"Unsupported type '{type(values)}' for JSONArray"
85+
)
86+
87+
# Ensures compatibility with pandas version 1.5.3
88+
if hasattr(self, "_data"):
89+
self._data = pa_data
90+
elif hasattr(self, "_pa_array"):
91+
self._pa_array = pa_data
92+
else:
93+
raise NotImplementedError(f"Unsupported pandas version: {pd.__version__}")
8394

8495
@classmethod
8596
def _box_pa(
@@ -111,7 +122,7 @@ def _box_pa_scalar(cls, value) -> pa.Scalar:
111122
def _box_pa_array(cls, value, copy: bool = False) -> pa.Array | pa.ChunkedArray:
112123
"""Box value into a pyarrow Array or ChunkedArray."""
113124
if isinstance(value, cls):
114-
pa_array = value._pa_array
125+
pa_array = value.pa_data
115126
else:
116127
value = [JSONArray._serialize_json(x) for x in value]
117128
pa_array = pa.array(value, type=cls._dtype.pyarrow_dtype, from_pandas=True)
@@ -147,11 +158,22 @@ def dtype(self) -> JSONDtype:
147158
"""An instance of JSONDtype"""
148159
return self._dtype
149160

161+
@property
162+
def pa_data(self):
163+
"""An instance of stored pa data"""
164+
# Ensures compatibility with pandas version 1.5.3
165+
if hasattr(self, "_data"):
166+
return self._data
167+
elif hasattr(self, "_pa_array"):
168+
return self._pa_array
169+
else:
170+
raise NotImplementedError(f"Unsupported pandas version: {pd.__version__}")
171+
150172
def _cmp_method(self, other, op):
151173
if op.__name__ == "eq":
152-
result = pyarrow.compute.equal(self._pa_array, self._box_pa(other))
174+
result = pyarrow.compute.equal(self.pa_data, self._box_pa(other))
153175
elif op.__name__ == "ne":
154-
result = pyarrow.compute.not_equal(self._pa_array, self._box_pa(other))
176+
result = pyarrow.compute.not_equal(self.pa_data, self._box_pa(other))
155177
else:
156178
# Comparison is not a meaningful one. We don't want to support sorting by JSON columns.
157179
raise TypeError(f"{op.__name__} not supported for JSONArray")
@@ -169,7 +191,7 @@ def __getitem__(self, item):
169191
else:
170192
# `check_array_indexer` should verify that the assertion hold true.
171193
assert item.dtype.kind == "b"
172-
return type(self)(self._pa_array.filter(item))
194+
return type(self)(self.pa_data.filter(item))
173195
elif isinstance(item, tuple):
174196
item = indexers.unpack_tuple_and_ellipses(item)
175197

@@ -181,7 +203,7 @@ def __getitem__(self, item):
181203
r"(`None`) and integer or boolean arrays are valid indices"
182204
)
183205

184-
value = self._pa_array[item]
206+
value = self.pa_data[item]
185207
if isinstance(value, pa.ChunkedArray):
186208
return type(self)(value)
187209
else:
@@ -193,7 +215,7 @@ def __getitem__(self, item):
193215

194216
def __iter__(self):
195217
"""Iterate over elements of the array."""
196-
for value in self._pa_array:
218+
for value in self.pa_data:
197219
val = JSONArray._deserialize_json(value.as_py())
198220
if val is None:
199221
yield self._dtype.na_value

testing/constraints-3.9.txt

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
# Make sure we test with pandas 1.3.0. The Python version isn't that relevant.
2-
pandas==1.3.0
3-
numpy<2.0.0
1+
# Make sure we test with pandas 1.5.0. The Python version isn't that relevant.
2+
pandas==1.5.3
3+
numpy==1.24.0

tests/unit/test_json.py

+2-14
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# limitations under the License.
1414

1515

16-
import json
17-
1816
import pandas as pd
1917
import pytest
2018

@@ -78,18 +76,8 @@ def test_getitems_when_iter_with_null():
7876
assert pd.isna(result)
7977

8078

81-
def test_to_numpy():
82-
s = pd.Series(db_dtypes.JSONArray._from_sequence(JSON_DATA.values()))
83-
data = s.to_numpy()
84-
for id, key in enumerate(JSON_DATA.keys()):
85-
if key == "null":
86-
assert pd.isna(data[id])
87-
else:
88-
assert data[id] == json.dumps(JSON_DATA[key], sort_keys=True)
89-
90-
9179
def test_deterministic_json_serialization():
9280
x = {"a": 0, "b": 1}
9381
y = {"b": 1, "a": 0}
94-
data = db_dtypes.JSONArray._from_sequence([x])
95-
assert y in data
82+
data = db_dtypes.JSONArray._from_sequence([y])
83+
assert data[0] == x

0 commit comments

Comments
 (0)