Skip to content

Commit 913d0bc

Browse files
committed
add pyarrow_dtypes
1 parent 7800242 commit 913d0bc

File tree

1 file changed

+12
-16
lines changed

1 file changed

+12
-16
lines changed

db_dtypes/json.py

+12-16
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ def type(self) -> type[str]:
4646
"""
4747
return str
4848

49+
@property
50+
def pyarrow_dtype(self):
51+
"""Return the pyarrow data type used for storing data in the pyarrow array."""
52+
return pa.string()
53+
4954
@property
5055
def _is_numeric(self) -> bool:
5156
return False
@@ -81,7 +86,7 @@ def _box_pa(
8186
cls, value, pa_type: pa.DataType | None = None
8287
) -> pa.Array | pa.ChunkedArray | pa.Scalar:
8388
"""Box value into a pyarrow Array, ChunkedArray or Scalar."""
84-
assert pa_type is None or pa_type == pa.string()
89+
assert pa_type is None or pa_type == cls._dtype.pyarrow_dtype
8590

8691
if isinstance(value, pa.Scalar) or not (
8792
common.is_list_like(value) and not common.is_dict_like(value)
@@ -93,10 +98,12 @@ def _box_pa(
9398
def _box_pa_scalar(cls, value) -> pa.Scalar:
9499
"""Box value into a pyarrow Scalar."""
95100
if pd.isna(value):
96-
pa_scalar = pa.scalar(None, type=pa.string())
101+
pa_scalar = pa.scalar(None, type=cls._dtype.pyarrow_dtype)
97102
else:
98103
value = JSONArray._serialize_json(value)
99-
pa_scalar = pa.scalar(value, type=pa.string(), from_pandas=True)
104+
pa_scalar = pa.scalar(
105+
value, type=cls._dtype.pyarrow_dtype, from_pandas=True
106+
)
100107

101108
return pa_scalar
102109

@@ -107,7 +114,7 @@ def _box_pa_array(cls, value, copy: bool = False) -> pa.Array | pa.ChunkedArray:
107114
pa_array = value._pa_array
108115
else:
109116
value = [JSONArray._serialize_json(x) for x in value]
110-
pa_array = pa.array(value, type=pa.string(), from_pandas=True)
117+
pa_array = pa.array(value, type=cls._dtype.pyarrow_dtype, from_pandas=True)
111118
return pa_array
112119

113120
@classmethod
@@ -117,17 +124,6 @@ def _from_sequence(cls, scalars, *, dtype=None, copy=False):
117124
arr = cls(pa_array)
118125
return arr
119126

120-
@classmethod
121-
def _concat_same_type(cls, to_concat) -> JSONArray:
122-
"""Concatenate multiple JSONArray."""
123-
chunks = [
124-
pa_array_chunks
125-
for item in to_concat
126-
for pa_array_chunks in item._pa_array.iterchunks()
127-
]
128-
arr = pa.chunked_array(chunks, type=pa.string())
129-
return cls(arr)
130-
131127
@staticmethod
132128
def _serialize_json(value):
133129
"""A static method that converts a JSON value into a string representation."""
@@ -167,7 +163,7 @@ def __getitem__(self, item):
167163

168164
if isinstance(item, np.ndarray):
169165
if not len(item):
170-
return type(self)(pa.chunked_array([], type=pa.string()))
166+
return type(self)(pa.chunked_array([], type=self.dtype.pyarrow_dtype))
171167
elif item.dtype.kind in "iu":
172168
return self.take(item)
173169
else:

0 commit comments

Comments
 (0)