24
24
import pyarrow as pa
25
25
import pyarrow .compute
26
26
27
- ARROW_CMP_FUNCS = {
28
- "eq" : pyarrow .compute .equal ,
29
- "ne" : pyarrow .compute .not_equal ,
30
- "lt" : pyarrow .compute .less ,
31
- "gt" : pyarrow .compute .greater ,
32
- "le" : pyarrow .compute .less_equal ,
33
- "ge" : pyarrow .compute .greater_equal ,
34
- }
35
-
36
27
37
28
@pd .api .extensions .register_extension_dtype
38
29
class JSONDtype (pd .api .extensions .ExtensionDtype ):
@@ -68,11 +59,6 @@ def construct_array_type(cls):
68
59
"""Return the array type associated with this dtype."""
69
60
return JSONArray
70
61
71
- # @staticmethod
72
- # def __from_arrow__(array: typing.Union[pa.Array, pa.ChunkedArray]) -> JSONArray:
73
- # """Convert to JSONArray from an Arrow array."""
74
- # return JSONArray(array)
75
-
76
62
77
63
class JSONArray (arrays .ArrowExtensionArray ):
78
64
"""Extension array that handles BigQuery JSON data, leveraging a string-based
@@ -95,26 +81,26 @@ def _box_pa(
95
81
cls , value , pa_type : pa .DataType | None = None
96
82
) -> pa .Array | pa .ChunkedArray | pa .Scalar :
97
83
"""Box value into a pyarrow Array, ChunkedArray or Scalar."""
84
+ if pa_type is not None and pa_type != pa .string ():
85
+ raise ValueError (f"Unsupported type '{ pa_type } ' for JSONArray" )
98
86
99
87
if isinstance (value , pa .Scalar ) or not (
100
88
common .is_list_like (value ) and not common .is_dict_like (value )
101
89
):
102
- return cls ._box_pa_scalar (value , pa_type )
103
- return cls ._box_pa_array (value , pa_type )
90
+ return cls ._box_pa_scalar (value )
91
+ return cls ._box_pa_array (value )
104
92
105
93
@classmethod
106
- def _box_pa_scalar (cls , value , pa_type : pa . DataType | None = None ) -> pa .Scalar :
94
+ def _box_pa_scalar (cls , value ) -> pa .Scalar :
107
95
"""Box value into a pyarrow Scalar."""
108
96
if isinstance (value , pa .Scalar ):
109
97
pa_scalar = value
110
98
if pd .isna (value ):
111
- pa_scalar = pa .scalar (None , type = pa_type )
99
+ pa_scalar = pa .scalar (None , type = pa . string () )
112
100
else :
113
101
value = JSONArray ._serialize_json (value )
114
- pa_scalar = pa .scalar (value , type = pa_type , from_pandas = True )
102
+ pa_scalar = pa .scalar (value , type = pa . string () , from_pandas = True )
115
103
116
- if pa_type is not None and pa_scalar .type != pa_type :
117
- pa_scalar = pa_scalar .cast (pa_type )
118
104
return pa_scalar
119
105
120
106
@classmethod
@@ -131,7 +117,8 @@ def _box_pa_array(
131
117
value = [JSONArray ._serialize_json (x ) for x in value ]
132
118
pa_array = pa .array (value , type = pa_type , from_pandas = True )
133
119
except (pa .ArrowInvalid , pa .ArrowTypeError ):
134
- # GH50430: let pyarrow infer type, then cast
120
+ # https://github.com/pandas-dev/pandas/pull/50430:
121
+ # let pyarrow infer type, then cast
135
122
pa_array = pa .array (value , from_pandas = True )
136
123
137
124
if pa_type is not None and pa_array .type != pa_type :
@@ -181,8 +168,12 @@ def dtype(self) -> JSONDtype:
181
168
return self ._dtype
182
169
183
170
def _cmp_method (self , other , op ):
184
- pc_func = ARROW_CMP_FUNCS [op .__name__ ]
185
- result = pc_func (self ._pa_array , self ._box_pa (other ))
171
+ if op .__name__ == "eq" :
172
+ result = pyarrow .compute .equal (self ._pa_array , self ._box_pa (other ))
173
+ elif op .__name__ == "ne" :
174
+ result = pyarrow .compute .not_equal (self ._pa_array , self ._box_pa (other ))
175
+ else :
176
+ raise NotImplementedError (f"{ op .__name__ } not implemented for JSONArray" )
186
177
return arrays .ArrowExtensionArray (result )
187
178
188
179
def __getitem__ (self , item ):
0 commit comments