41
41
pa_version_under13p0 ,
42
42
pa_version_under14p0 ,
43
43
)
44
+ import pandas .util ._test_decorators as td
44
45
45
46
from pandas .core .dtypes .dtypes import (
46
47
ArrowDtype ,
@@ -266,6 +267,19 @@ def data_for_twos(data):
266
267
267
268
268
269
class TestArrowArray (base .ExtensionTests ):
270
+ def test_compare_scalar (self , data , comparison_op ):
271
+ ser = pd .Series (data )
272
+ self ._compare_other (ser , data , comparison_op , data [0 ])
273
+
274
+ @pytest .mark .parametrize ("na_action" , [None , "ignore" ])
275
+ def test_map (self , data_missing , na_action ):
276
+ if data_missing .dtype .kind in "mM" :
277
+ result = data_missing .map (lambda x : x , na_action = na_action )
278
+ expected = data_missing .to_numpy (dtype = object )
279
+ tm .assert_numpy_array_equal (result , expected )
280
+ else :
281
+ super ().test_map (data_missing , na_action )
282
+
269
283
def test_astype_str (self , data , request ):
270
284
pa_dtype = data .dtype .pyarrow_dtype
271
285
if pa .types .is_binary (pa_dtype ):
@@ -274,8 +288,35 @@ def test_astype_str(self, data, request):
274
288
reason = f"For { pa_dtype } .astype(str) decodes." ,
275
289
)
276
290
)
291
+ elif (
292
+ pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is None
293
+ ) or pa .types .is_duration (pa_dtype ):
294
+ request .applymarker (
295
+ pytest .mark .xfail (
296
+ reason = "pd.Timestamp/pd.Timedelta repr different from numpy repr" ,
297
+ )
298
+ )
277
299
super ().test_astype_str (data )
278
300
301
+ @pytest .mark .parametrize (
302
+ "nullable_string_dtype" ,
303
+ [
304
+ "string[python]" ,
305
+ pytest .param ("string[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
306
+ ],
307
+ )
308
+ def test_astype_string (self , data , nullable_string_dtype , request ):
309
+ pa_dtype = data .dtype .pyarrow_dtype
310
+ if (
311
+ pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is None
312
+ ) or pa .types .is_duration (pa_dtype ):
313
+ request .applymarker (
314
+ pytest .mark .xfail (
315
+ reason = "pd.Timestamp/pd.Timedelta repr different from numpy repr" ,
316
+ )
317
+ )
318
+ super ().test_astype_string (data , nullable_string_dtype )
319
+
279
320
def test_from_dtype (self , data , request ):
280
321
pa_dtype = data .dtype .pyarrow_dtype
281
322
if pa .types .is_string (pa_dtype ) or pa .types .is_decimal (pa_dtype ):
@@ -1511,11 +1552,9 @@ def test_to_numpy_with_defaults(data):
1511
1552
result = data .to_numpy ()
1512
1553
1513
1554
pa_type = data ._pa_array .type
1514
- if (
1515
- pa .types .is_duration (pa_type )
1516
- or pa .types .is_timestamp (pa_type )
1517
- or pa .types .is_date (pa_type )
1518
- ):
1555
+ if pa .types .is_duration (pa_type ) or pa .types .is_timestamp (pa_type ):
1556
+ pytest .skip ("Tested in test_to_numpy_temporal" )
1557
+ elif pa .types .is_date (pa_type ):
1519
1558
expected = np .array (list (data ))
1520
1559
else :
1521
1560
expected = np .array (data ._pa_array )
@@ -2937,26 +2976,28 @@ def test_groupby_series_size_returns_pa_int(data):
2937
2976
2938
2977
2939
2978
@pytest .mark .parametrize (
2940
- "pa_type" , tm .DATETIME_PYARROW_DTYPES + tm .TIMEDELTA_PYARROW_DTYPES
2979
+ "pa_type" , tm .DATETIME_PYARROW_DTYPES + tm .TIMEDELTA_PYARROW_DTYPES , ids = repr
2941
2980
)
2942
- def test_to_numpy_temporal (pa_type ):
2981
+ @pytest .mark .parametrize ("dtype" , [None , object ])
2982
+ def test_to_numpy_temporal (pa_type , dtype ):
2943
2983
# GH 53326
2984
+ # GH 55997: Return datetime64/timedelta64 types with NaT if possible
2944
2985
arr = ArrowExtensionArray (pa .array ([1 , None ], type = pa_type ))
2945
- result = arr .to_numpy ()
2986
+ result = arr .to_numpy (dtype = dtype )
2946
2987
if pa .types .is_duration (pa_type ):
2947
- expected = [
2948
- pd .Timedelta (1 , unit = pa_type .unit ).as_unit (pa_type .unit ),
2949
- pd .NA ,
2950
- ]
2951
- assert isinstance (result [0 ], pd .Timedelta )
2988
+ value = pd .Timedelta (1 , unit = pa_type .unit ).as_unit (pa_type .unit )
2952
2989
else :
2953
- expected = [
2954
- pd .Timestamp (1 , unit = pa_type .unit , tz = pa_type .tz ).as_unit (pa_type .unit ),
2955
- pd .NA ,
2956
- ]
2957
- assert isinstance (result [0 ], pd .Timestamp )
2958
- expected = np .array (expected , dtype = object )
2959
- assert result [0 ].unit == expected [0 ].unit
2990
+ value = pd .Timestamp (1 , unit = pa_type .unit , tz = pa_type .tz ).as_unit (pa_type .unit )
2991
+
2992
+ if dtype == object or (pa .types .is_timestamp (pa_type ) and pa_type .tz is not None ):
2993
+ na = pd .NA
2994
+ expected = np .array ([value , na ], dtype = object )
2995
+ assert result [0 ].unit == value .unit
2996
+ else :
2997
+ na = pa_type .to_pandas_dtype ().type ("nat" , pa_type .unit )
2998
+ value = value .to_numpy ()
2999
+ expected = np .array ([value , na ])
3000
+ assert np .datetime_data (result [0 ])[0 ] == pa_type .unit
2960
3001
tm .assert_numpy_array_equal (result , expected )
2961
3002
2962
3003
0 commit comments