65
65
from pandas .core .arrays .arrow .extension_types import ArrowPeriodType
66
66
67
67
68
+ def _require_timezone_database (request ):
69
+ if is_platform_windows () and is_ci_environment ():
70
+ mark = pytest .mark .xfail (
71
+ raises = pa .ArrowInvalid ,
72
+ reason = (
73
+ "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
74
+ "on CI to path to the tzdata for pyarrow."
75
+ ),
76
+ )
77
+ request .node .add_marker (mark )
78
+
79
+
68
80
@pytest .fixture (params = tm .ALL_PYARROW_DTYPES , ids = str )
69
81
def dtype (request ):
70
82
return ArrowDtype (pyarrow_dtype = request .param )
@@ -314,16 +326,8 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
314
326
)
315
327
)
316
328
elif pa .types .is_timestamp (pa_dtype ) and pa_dtype .tz is not None :
317
- if is_platform_windows () and is_ci_environment ():
318
- request .node .add_marker (
319
- pytest .mark .xfail (
320
- raises = pa .ArrowInvalid ,
321
- reason = (
322
- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
323
- "on CI to path to the tzdata for pyarrow."
324
- ),
325
- )
326
- )
329
+ _require_timezone_database (request )
330
+
327
331
pa_array = data ._pa_array .cast (pa .string ())
328
332
result = type (data )._from_sequence_of_strings (pa_array , dtype = data .dtype )
329
333
tm .assert_extension_array_equal (result , data )
@@ -795,20 +799,6 @@ def test_value_counts_returns_pyarrow_int64(self, data):
795
799
result = data .value_counts ()
796
800
assert result .dtype == ArrowDtype (pa .int64 ())
797
801
798
- def test_value_counts_with_normalize (self , data , request ):
799
- data = data [:10 ].unique ()
800
- values = np .array (data [~ data .isna ()])
801
- ser = pd .Series (data , dtype = data .dtype )
802
-
803
- result = ser .value_counts (normalize = True ).sort_index ()
804
-
805
- expected = pd .Series (
806
- [1 / len (values )] * len (values ), index = result .index , name = "proportion"
807
- )
808
- expected = expected .astype ("double[pyarrow]" )
809
-
810
- self .assert_series_equal (result , expected )
811
-
812
802
def test_argmin_argmax (
813
803
self , data_for_sorting , data_missing_for_sorting , na_value , request
814
804
):
@@ -865,10 +855,6 @@ def test_combine_add(self, data_repeated, request):
865
855
else :
866
856
super ().test_combine_add (data_repeated )
867
857
868
- def test_basic_equals (self , data ):
869
- # https://github.com/pandas-dev/pandas/issues/34660
870
- assert pd .Series (data ).equals (pd .Series (data ))
871
-
872
858
873
859
class TestBaseArithmeticOps (base .BaseArithmeticOpsTests ):
874
860
divmod_exc = NotImplementedError
@@ -2563,33 +2549,17 @@ def test_dt_isocalendar():
2563
2549
)
2564
2550
def test_dt_day_month_name (method , exp , request ):
2565
2551
# GH 52388
2566
- if is_platform_windows () and is_ci_environment ():
2567
- request .node .add_marker (
2568
- pytest .mark .xfail (
2569
- raises = pa .ArrowInvalid ,
2570
- reason = (
2571
- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2572
- "on CI to path to the tzdata for pyarrow."
2573
- ),
2574
- )
2575
- )
2552
+ _require_timezone_database (request )
2553
+
2576
2554
ser = pd .Series ([datetime (2023 , 1 , 1 ), None ], dtype = ArrowDtype (pa .timestamp ("ms" )))
2577
2555
result = getattr (ser .dt , method )()
2578
2556
expected = pd .Series ([exp , None ], dtype = ArrowDtype (pa .string ()))
2579
2557
tm .assert_series_equal (result , expected )
2580
2558
2581
2559
2582
2560
def test_dt_strftime (request ):
2583
- if is_platform_windows () and is_ci_environment ():
2584
- request .node .add_marker (
2585
- pytest .mark .xfail (
2586
- raises = pa .ArrowInvalid ,
2587
- reason = (
2588
- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2589
- "on CI to path to the tzdata for pyarrow."
2590
- ),
2591
- )
2592
- )
2561
+ _require_timezone_database (request )
2562
+
2593
2563
ser = pd .Series (
2594
2564
[datetime (year = 2023 , month = 1 , day = 2 , hour = 3 ), None ],
2595
2565
dtype = ArrowDtype (pa .timestamp ("ns" )),
@@ -2700,16 +2670,8 @@ def test_dt_tz_localize_none():
2700
2670
2701
2671
@pytest .mark .parametrize ("unit" , ["us" , "ns" ])
2702
2672
def test_dt_tz_localize (unit , request ):
2703
- if is_platform_windows () and is_ci_environment ():
2704
- request .node .add_marker (
2705
- pytest .mark .xfail (
2706
- raises = pa .ArrowInvalid ,
2707
- reason = (
2708
- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2709
- "on CI to path to the tzdata for pyarrow."
2710
- ),
2711
- )
2712
- )
2673
+ _require_timezone_database (request )
2674
+
2713
2675
ser = pd .Series (
2714
2676
[datetime (year = 2023 , month = 1 , day = 2 , hour = 3 ), None ],
2715
2677
dtype = ArrowDtype (pa .timestamp (unit )),
@@ -2731,16 +2693,8 @@ def test_dt_tz_localize(unit, request):
2731
2693
],
2732
2694
)
2733
2695
def test_dt_tz_localize_nonexistent (nonexistent , exp_date , request ):
2734
- if is_platform_windows () and is_ci_environment ():
2735
- request .node .add_marker (
2736
- pytest .mark .xfail (
2737
- raises = pa .ArrowInvalid ,
2738
- reason = (
2739
- "TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
2740
- "on CI to path to the tzdata for pyarrow."
2741
- ),
2742
- )
2743
- )
2696
+ _require_timezone_database (request )
2697
+
2744
2698
ser = pd .Series (
2745
2699
[datetime (year = 2023 , month = 3 , day = 12 , hour = 2 , minute = 30 ), None ],
2746
2700
dtype = ArrowDtype (pa .timestamp ("ns" )),
0 commit comments