Skip to content

Commit 48f23a1

Browse files
committed
add docstring and remove unused functions
1 parent 56d367f commit 48f23a1

File tree

2 files changed

+22
-118
lines changed

2 files changed

+22
-118
lines changed

db_dtypes/json.py

Lines changed: 22 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -30,16 +30,18 @@
3030

3131
@pd.api.extensions.register_extension_dtype
3232
class JSONDtype(pd.api.extensions.ExtensionDtype):
33-
"""Extension dtype for JSON data."""
33+
"""Extension dtype for BigQuery JSON data."""
3434

3535
name = "dbjson"
3636

3737
@property
3838
def na_value(self) -> pd.NA:
39+
"""Default NA value to use for this type."""
3940
return pd.NA
4041

4142
@property
4243
def type(self) -> type[str]:
44+
"""Return the scalar type for the array, e.g. int."""
4345
return dict
4446

4547
@property
@@ -62,7 +64,9 @@ def __from_arrow__(array: typing.Union[pa.Array, pa.ChunkedArray]) -> JSONArray:
6264

6365

6466
class JSONArray(ArrowExtensionArray):
65-
"""Extension array containing JSON data."""
67+
"""Extension array that handles BigQuery JSON data, leveraging a string-based
68+
pyarrow array for storage. It enables seamless conversion to JSON objects when
69+
accessing individual elements."""
6670

6771
_dtype = JSONDtype()
6872

@@ -88,18 +92,7 @@ def __init__(self, values, dtype=None, copy=False) -> None:
8892
def _box_pa(
8993
cls, value, pa_type: pa.DataType | None = None
9094
) -> pa.Array | pa.ChunkedArray | pa.Scalar:
91-
"""
92-
Box value into a pyarrow Array, ChunkedArray or Scalar.
93-
94-
Parameters
95-
----------
96-
value : any
97-
pa_type : pa.DataType | None
98-
99-
Returns
100-
-------
101-
pa.Array or pa.ChunkedArray or pa.Scalar
102-
"""
95+
"""Box value into a pyarrow Array, ChunkedArray or Scalar."""
10396
if isinstance(value, pa.Scalar) or not (
10497
is_list_like(value) and not is_dict_like(value)
10598
):
@@ -108,18 +101,7 @@ def _box_pa(
108101

109102
@classmethod
110103
def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
111-
"""
112-
Box value into a pyarrow Scalar.
113-
114-
Parameters
115-
----------
116-
value : any
117-
pa_type : pa.DataType | None
118-
119-
Returns
120-
-------
121-
pa.Scalar
122-
"""
104+
"""Box value into a pyarrow Scalar."""
123105
value = JSONArray._seralizate_json(value)
124106
pa_scalar = super()._box_pa_scalar(value, pa_type)
125107
if pa.types.is_string(pa_scalar.type) and pa_type is None:
@@ -130,18 +112,7 @@ def _box_pa_scalar(cls, value, pa_type: pa.DataType | None = None) -> pa.Scalar:
130112
def _box_pa_array(
131113
cls, value, pa_type: pa.DataType | None = None, copy: bool = False
132114
) -> pa.Array | pa.ChunkedArray:
133-
"""
134-
Box value into a pyarrow Array or ChunkedArray.
135-
136-
Parameters
137-
----------
138-
value : Sequence
139-
pa_type : pa.DataType | None
140-
141-
Returns
142-
-------
143-
pa.Array or pa.ChunkedArray
144-
"""
115+
"""Box value into a pyarrow Array or ChunkedArray."""
145116
if (
146117
not isinstance(value, cls)
147118
and not isinstance(value, (pa.Array, pa.ChunkedArray))
@@ -155,18 +126,7 @@ def _box_pa_array(
155126

156127
@classmethod
157128
def _from_sequence(cls, scalars, *, dtype=None, copy=False):
158-
# TODO: check _from_arrow APIs etc.
159-
# from pandas.core.arrays.masked import BaseMaskedArray
160-
161-
# if isinstance(scalars, BaseMaskedArray):
162-
# # avoid costly conversion to object dtype in ensure_string_array and
163-
# # numerical issues with Float32Dtype
164-
# na_values = scalars._mask
165-
# result = scalars._data
166-
# # result = lib.ensure_string_array(result, copy=copy, convert_na_value=False)
167-
# return cls(pa.array(result, mask=na_values, type=pa.large_string()))
168-
# elif isinstance(scalars, (pa.Array, pa.ChunkedArray)):
169-
# return cls(pc.cast(scalars, pa.large_string()))
129+
"""Construct a new ExtensionArray from a sequence of scalars."""
170130
result = []
171131
for scalar in scalars:
172132
result.append(JSONArray._seralizate_json(scalar))
@@ -176,10 +136,12 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
176136
def _from_sequence_of_strings(
177137
cls, strings, *, dtype: ExtensionDtype, copy: bool = False
178138
) -> JSONArray:
139+
"""Construct a new ExtensionArray from a sequence of strings."""
179140
return cls._from_sequence(strings, dtype=dtype, copy=copy)
180141

181142
@staticmethod
182143
def _seralizate_json(value):
144+
"""A static method that converts a JSON value into a string representation."""
183145
if isinstance(value, str) or pd.isna(value):
184146
return value
185147
else:
@@ -189,6 +151,7 @@ def _seralizate_json(value):
189151

190152
@staticmethod
191153
def _deserialize_json(value):
154+
"""A static method that converts a JSON string back into its original value."""
192155
if not pd.isna(value):
193156
return json.loads(value)
194157
else:
@@ -200,40 +163,24 @@ def dtype(self) -> JSONDtype:
200163
return self._dtype
201164

202165
def __contains__(self, key) -> bool:
166+
"""Return for `item in self`."""
203167
return super().__contains__(JSONArray._seralizate_json(key))
204168

205169
def insert(self, loc: int, item) -> JSONArray:
170+
"""
171+
Make new ExtensionArray inserting new item at location. Follows Python
172+
list.append semantics for negative values.
173+
"""
206174
val = JSONArray._seralizate_json(item)
207175
return super().insert(loc, val)
208176

209177
@classmethod
210178
def _from_factorized(cls, values, original):
179+
"""Reconstruct an ExtensionArray after factorization."""
211180
return cls._from_sequence(values, dtype=original.dtype)
212181

213182
def __getitem__(self, item):
214-
"""Select a subset of self.
215-
216-
Parameters
217-
----------
218-
item : int, slice, or ndarray
219-
* int: The position in 'self' to get.
220-
* slice: A slice object, where 'start', 'stop', and 'step' are
221-
integers or None
222-
* ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
223-
224-
Returns
225-
-------
226-
item : scalar or ExtensionArray
227-
228-
Notes
229-
-----
230-
For scalar ``item``, return a scalar value suitable for the array's
231-
type. This should be an instance of ``self.dtype.type``.
232-
For slice ``key``, return an instance of ``ExtensionArray``, even
233-
if the slice is length 0 or 1.
234-
For a boolean mask, return an instance of ``ExtensionArray``, filtered
235-
to the values where ``item`` is True.
236-
"""
183+
"""Select a subset of self."""
237184
item = check_array_indexer(self, item)
238185

239186
if isinstance(item, np.ndarray):
@@ -283,37 +230,17 @@ def __getitem__(self, item):
283230
return scalar
284231

285232
def __iter__(self):
286-
"""
287-
Iterate over elements of the array.
288-
"""
233+
"""Iterate over elements of the array."""
289234
for value in self._pa_array:
290235
val = JSONArray._deserialize_json(value.as_py())
291236
if val is None:
292237
yield self._dtype.na_value
293238
else:
294239
yield val
295240

296-
@classmethod
297-
def _result_converter(cls, values, na=None):
298-
return pd.BooleanDtype().__from_arrow__(values)
299-
300241
@classmethod
301242
def _concat_same_type(cls, to_concat) -> JSONArray:
302-
"""
303-
Concatenate multiple JSONArray.
304-
305-
Parameters
306-
----------
307-
to_concat : sequence of JSONArray
308-
309-
Returns
310-
-------
311-
JSONArray
312-
"""
243+
"""Concatenate multiple JSONArray."""
313244
chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
314245
arr = pa.chunked_array(chunks, type=pa.large_string())
315246
return cls(arr)
316-
317-
def _pad_or_backfill(self, *, method, limit=None, copy=True):
318-
# GH#56616 - test EA method without limit_area argument
319-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)

tests/compliance/json/test_json_compliance.py

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -26,29 +26,6 @@
2626

2727

2828
class TestJSONArray(base.ExtensionTests):
29-
@pytest.mark.parametrize(
30-
"limit_area, input_ilocs, expected_ilocs",
31-
[
32-
("outside", [1, 0, 0, 0, 1], [1, 0, 0, 0, 1]),
33-
("outside", [1, 0, 1, 0, 1], [1, 0, 1, 0, 1]),
34-
("outside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 1]),
35-
("outside", [0, 1, 0, 1, 0], [0, 1, 0, 1, 1]),
36-
("inside", [1, 0, 0, 0, 1], [1, 1, 1, 1, 1]),
37-
("inside", [1, 0, 1, 0, 1], [1, 1, 1, 1, 1]),
38-
("inside", [0, 1, 1, 1, 0], [0, 1, 1, 1, 0]),
39-
("inside", [0, 1, 0, 1, 0], [0, 1, 1, 1, 0]),
40-
],
41-
)
42-
def test_ffill_limit_area(
43-
self, data_missing, limit_area, input_ilocs, expected_ilocs
44-
):
45-
# GH#56616
46-
msg = "JSONArray does not implement limit_area"
47-
with pytest.raises(NotImplementedError, match=msg):
48-
super().test_ffill_limit_area(
49-
data_missing, limit_area, input_ilocs, expected_ilocs
50-
)
51-
5229
@pytest.mark.xfail(reason="Unhashable")
5330
def test_value_counts_with_normalize(self, data):
5431
super().test_value_counts_with_normalize(data)

0 commit comments

Comments
 (0)