@@ -72,14 +72,25 @@ class JSONArray(arrays.ArrowExtensionArray):
72
72
73
73
_dtype = JSONDtype ()
74
74
75
- def __init__ (self , values , dtype = None , copy = False ) -> None :
75
+ def __init__ (self , values ) -> None :
76
+ super ().__init__ (values )
76
77
self ._dtype = JSONDtype ()
77
78
if isinstance (values , pa .Array ):
78
- self . _pa_array = pa .chunked_array ([values ])
79
+ pa_data = pa .chunked_array ([values ])
79
80
elif isinstance (values , pa .ChunkedArray ):
80
- self . _pa_array = values
81
+ pa_data = values
81
82
else :
82
- raise ValueError (f"Unsupported type '{ type (values )} ' for JSONArray" )
83
+ raise NotImplementedError (
84
+ f"Unsupported type '{ type (values )} ' for JSONArray"
85
+ )
86
+
87
+ # Ensures compatibility with pandas version 1.5.3
88
+ if hasattr (self , "_data" ):
89
+ self ._data = pa_data
90
+ elif hasattr (self , "_pa_array" ):
91
+ self ._pa_array = pa_data
92
+ else :
93
+ raise NotImplementedError (f"Unsupported pandas version: { pd .__version__ } " )
83
94
84
95
@classmethod
85
96
def _box_pa (
@@ -111,7 +122,7 @@ def _box_pa_scalar(cls, value) -> pa.Scalar:
111
122
def _box_pa_array (cls , value , copy : bool = False ) -> pa .Array | pa .ChunkedArray :
112
123
"""Box value into a pyarrow Array or ChunkedArray."""
113
124
if isinstance (value , cls ):
114
- pa_array = value ._pa_array
125
+ pa_array = value .pa_data
115
126
else :
116
127
value = [JSONArray ._serialize_json (x ) for x in value ]
117
128
pa_array = pa .array (value , type = cls ._dtype .pyarrow_dtype , from_pandas = True )
@@ -147,11 +158,22 @@ def dtype(self) -> JSONDtype:
147
158
"""An instance of JSONDtype"""
148
159
return self ._dtype
149
160
161
+ @property
162
+ def pa_data (self ):
163
+ """An instance of stored pa data"""
164
+ # Ensures compatibility with pandas version 1.5.3
165
+ if hasattr (self , "_data" ):
166
+ return self ._data
167
+ elif hasattr (self , "_pa_array" ):
168
+ return self ._pa_array
169
+ else :
170
+ raise NotImplementedError (f"Unsupported pandas version: { pd .__version__ } " )
171
+
150
172
def _cmp_method (self , other , op ):
151
173
if op .__name__ == "eq" :
152
- result = pyarrow .compute .equal (self ._pa_array , self ._box_pa (other ))
174
+ result = pyarrow .compute .equal (self .pa_data , self ._box_pa (other ))
153
175
elif op .__name__ == "ne" :
154
- result = pyarrow .compute .not_equal (self ._pa_array , self ._box_pa (other ))
176
+ result = pyarrow .compute .not_equal (self .pa_data , self ._box_pa (other ))
155
177
else :
156
178
# Comparison is not a meaningful one. We don't want to support sorting by JSON columns.
157
179
raise TypeError (f"{ op .__name__ } not supported for JSONArray" )
@@ -169,7 +191,7 @@ def __getitem__(self, item):
169
191
else :
170
192
# `check_array_indexer` should verify that the assertion hold true.
171
193
assert item .dtype .kind == "b"
172
- return type (self )(self ._pa_array .filter (item ))
194
+ return type (self )(self .pa_data .filter (item ))
173
195
elif isinstance (item , tuple ):
174
196
item = indexers .unpack_tuple_and_ellipses (item )
175
197
@@ -181,7 +203,7 @@ def __getitem__(self, item):
181
203
r"(`None`) and integer or boolean arrays are valid indices"
182
204
)
183
205
184
- value = self ._pa_array [item ]
206
+ value = self .pa_data [item ]
185
207
if isinstance (value , pa .ChunkedArray ):
186
208
return type (self )(value )
187
209
else :
@@ -193,7 +215,7 @@ def __getitem__(self, item):
193
215
194
216
def __iter__ (self ):
195
217
"""Iterate over elements of the array."""
196
- for value in self ._pa_array :
218
+ for value in self .pa_data :
197
219
val = JSONArray ._deserialize_json (value .as_py ())
198
220
if val is None :
199
221
yield self ._dtype .na_value
0 commit comments