Skip to content

Commit e6a014f

Browse files
phofljbrockmendel
andauthored
Backport PR #48443 on branch 1.5.x (BUG: Fix pyarrow groupby tests) (#48494)
* BUG: Fix pyarrow groupby tests (#48443) # Conflicts: # pandas/tests/extension/test_arrow.py * CI: Fix failing tests (#48493) Co-authored-by: jbrockmendel <[email protected]>
1 parent ad087f5 commit e6a014f

File tree

2 files changed

+15
-50
lines changed

2 files changed

+15
-50
lines changed

pandas/core/series.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,10 @@ def _set_axis(self, axis: int, labels: AnyArrayLike | list) -> None:
572572
"""
573573
labels = ensure_index(labels)
574574

575-
if labels._is_all_dates:
575+
if labels._is_all_dates and not (
576+
type(labels) is Index and not isinstance(labels.dtype, np.dtype)
577+
):
578+
# exclude e.g. timestamp[ns][pyarrow] dtype from this casting
576579
deep_labels = labels
577580
if isinstance(labels, CategoricalIndex):
578581
deep_labels = labels.categories

pandas/tests/extension/test_arrow.py

+11-49
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
pa_version_under8p0,
3232
pa_version_under9p0,
3333
)
34+
from pandas.errors import PerformanceWarning
3435

3536
import pandas as pd
3637
import pandas._testing as tm
@@ -515,15 +516,6 @@ def test_groupby_extension_no_sort(self, data_for_grouping, request):
515516
reason=f"pyarrow doesn't support factorizing {pa_dtype}",
516517
)
517518
)
518-
elif pa.types.is_date(pa_dtype) or (
519-
pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
520-
):
521-
request.node.add_marker(
522-
pytest.mark.xfail(
523-
raises=AttributeError,
524-
reason="GH 34986",
525-
)
526-
)
527519
super().test_groupby_extension_no_sort(data_for_grouping)
528520

529521
def test_groupby_extension_transform(self, data_for_grouping, request):
@@ -547,8 +539,7 @@ def test_groupby_extension_apply(
547539
self, data_for_grouping, groupby_apply_op, request
548540
):
549541
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
550-
# Is there a better way to get the "series" ID for groupby_apply_op?
551-
is_series = "series" in request.node.nodeid
542+
# TODO: Is there a better way to get the "object" ID for groupby_apply_op?
552543
is_object = "object" in request.node.nodeid
553544
if pa.types.is_duration(pa_dtype):
554545
request.node.add_marker(
@@ -567,14 +558,10 @@ def test_groupby_extension_apply(
567558
reason="GH 47514: _concat_datetime expects axis arg.",
568559
)
569560
)
570-
elif not is_series:
571-
request.node.add_marker(
572-
pytest.mark.xfail(
573-
raises=AttributeError,
574-
reason="GH 34986",
575-
)
576-
)
577-
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
561+
with tm.maybe_produces_warning(
562+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
563+
):
564+
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
578565

579566
def test_in_numeric_groupby(self, data_for_grouping, request):
580567
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
@@ -603,17 +590,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
603590
reason=f"pyarrow doesn't support factorizing {pa_dtype}",
604591
)
605592
)
606-
elif as_index is True and (
607-
pa.types.is_date(pa_dtype)
608-
or (pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None)
593+
with tm.maybe_produces_warning(
594+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
609595
):
610-
request.node.add_marker(
611-
pytest.mark.xfail(
612-
raises=AttributeError,
613-
reason="GH 34986",
614-
)
615-
)
616-
super().test_groupby_extension_agg(as_index, data_for_grouping)
596+
super().test_groupby_extension_agg(as_index, data_for_grouping)
617597

618598

619599
class TestBaseDtype(base.BaseDtypeTests):
@@ -1443,16 +1423,7 @@ def test_diff(self, data, periods, request):
14431423
@pytest.mark.parametrize("dropna", [True, False])
14441424
def test_value_counts(self, all_data, dropna, request):
14451425
pa_dtype = all_data.dtype.pyarrow_dtype
1446-
if pa.types.is_date(pa_dtype) or (
1447-
pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
1448-
):
1449-
request.node.add_marker(
1450-
pytest.mark.xfail(
1451-
raises=AttributeError,
1452-
reason="GH 34986",
1453-
)
1454-
)
1455-
elif pa.types.is_duration(pa_dtype):
1426+
if pa.types.is_duration(pa_dtype):
14561427
request.node.add_marker(
14571428
pytest.mark.xfail(
14581429
raises=pa.ArrowNotImplementedError,
@@ -1463,16 +1434,7 @@ def test_value_counts(self, all_data, dropna, request):
14631434

14641435
def test_value_counts_with_normalize(self, data, request):
14651436
pa_dtype = data.dtype.pyarrow_dtype
1466-
if pa.types.is_date(pa_dtype) or (
1467-
pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is None
1468-
):
1469-
request.node.add_marker(
1470-
pytest.mark.xfail(
1471-
raises=AttributeError,
1472-
reason="GH 34986",
1473-
)
1474-
)
1475-
elif pa.types.is_duration(pa_dtype):
1437+
if pa.types.is_duration(pa_dtype):
14761438
request.node.add_marker(
14771439
pytest.mark.xfail(
14781440
raises=pa.ArrowNotImplementedError,

0 commit comments

Comments
 (0)