Skip to content

Commit b178966

Browse files
Backport PR #51545 on branch 2.0.x (TST/CLN: Remove unnecessary pyarrow version checking) (#51595)
Backport PR #51545: TST/CLN: Remove unnecessary pyarrow version checking Co-authored-by: Matthew Roeschke <[email protected]>
1 parent a19620e commit b178966

File tree

10 files changed

+61
-375
lines changed

10 files changed

+61
-375
lines changed

pandas/core/arrays/arrow/array.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -569,14 +569,8 @@ def argsort(
569569
) -> np.ndarray:
570570
order = "ascending" if ascending else "descending"
571571
null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None)
572-
if null_placement is None or pa_version_under7p0:
573-
# Although pc.array_sort_indices exists in version 6
574-
# there's a bug that affects the pa.ChunkedArray backing
575-
# https://issues.apache.org/jira/browse/ARROW-12042
576-
fallback_performancewarning("7")
577-
return super().argsort(
578-
ascending=ascending, kind=kind, na_position=na_position
579-
)
572+
if null_placement is None:
573+
raise ValueError(f"invalid na_position: {na_position}")
580574

581575
result = pc.array_sort_indices(
582576
self._data, order=order, null_placement=null_placement
@@ -640,9 +634,8 @@ def fillna(
640634
if limit is not None:
641635
return super().fillna(value=value, method=method, limit=limit)
642636

643-
if method is not None and pa_version_under7p0:
644-
# fill_null_{forward|backward} added in pyarrow 7.0
645-
fallback_performancewarning(version="7")
637+
if method is not None:
638+
fallback_performancewarning()
646639
return super().fillna(value=value, method=method, limit=limit)
647640

648641
if is_array_like(value):

pandas/tests/base/test_value_counts.py

+6-33
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import pa_version_under7p0
8-
from pandas.errors import PerformanceWarning
9-
107
import pandas as pd
118
from pandas import (
129
DatetimeIndex,
@@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj):
4845
# TODO(GH#32514): Order of entries with the same count is inconsistent
4946
# on CI (gh-32449)
5047
if obj.duplicated().any():
51-
with tm.maybe_produces_warning(
52-
PerformanceWarning,
53-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
54-
):
55-
result = result.sort_index()
56-
with tm.maybe_produces_warning(
57-
PerformanceWarning,
58-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
59-
):
60-
expected = expected.sort_index()
48+
result = result.sort_index()
49+
expected = expected.sort_index()
6150
tm.assert_series_equal(result, expected)
6251

6352

@@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
9786
if obj.duplicated().any():
9887
# TODO(GH#32514):
9988
# Order of entries with the same count is inconsistent on CI (gh-32449)
100-
with tm.maybe_produces_warning(
101-
PerformanceWarning,
102-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
103-
):
104-
expected = expected.sort_index()
105-
with tm.maybe_produces_warning(
106-
PerformanceWarning,
107-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
108-
):
109-
result = result.sort_index()
89+
expected = expected.sort_index()
90+
result = result.sort_index()
11091

11192
if not isinstance(result.dtype, np.dtype):
11293
# i.e IntegerDtype
@@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
119100
if obj.duplicated().any():
120101
# TODO(GH#32514):
121102
# Order of entries with the same count is inconsistent on CI (gh-32449)
122-
with tm.maybe_produces_warning(
123-
PerformanceWarning,
124-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
125-
):
126-
expected = expected.sort_index()
127-
with tm.maybe_produces_warning(
128-
PerformanceWarning,
129-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
130-
):
131-
result = result.sort_index()
103+
expected = expected.sort_index()
104+
result = result.sort_index()
132105
tm.assert_series_equal(result, expected)
133106

134107

pandas/tests/extension/test_arrow.py

+14-97
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,6 @@
5959

6060
from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip
6161

62-
pytestmark = pytest.mark.filterwarnings(
63-
"ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly"
64-
)
65-
6662

6763
@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
6864
def dtype(request):
@@ -311,14 +307,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
311307
)
312308
)
313309
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
314-
if pa_version_under7p0:
315-
request.node.add_marker(
316-
pytest.mark.xfail(
317-
raises=pa.ArrowNotImplementedError,
318-
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
319-
)
320-
)
321-
elif is_platform_windows() and is_ci_environment():
310+
if is_platform_windows() and is_ci_environment():
322311
request.node.add_marker(
323312
pytest.mark.xfail(
324313
raises=pa.ArrowInvalid,
@@ -569,23 +558,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
569558
reason=f"{pa_dtype} only has 2 unique possible values",
570559
)
571560
)
572-
with tm.maybe_produces_warning(
573-
PerformanceWarning,
574-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
575-
check_stacklevel=False,
576-
):
577-
super().test_groupby_extension_transform(data_for_grouping)
578-
579-
def test_groupby_extension_apply(
580-
self, data_for_grouping, groupby_apply_op, request
581-
):
582-
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
583-
with tm.maybe_produces_warning(
584-
PerformanceWarning,
585-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
586-
check_stacklevel=False,
587-
):
588-
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
561+
super().test_groupby_extension_transform(data_for_grouping)
589562

590563
@pytest.mark.parametrize("as_index", [True, False])
591564
def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
@@ -597,12 +570,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
597570
reason=f"{pa_dtype} only has 2 unique possible values",
598571
)
599572
)
600-
with tm.maybe_produces_warning(
601-
PerformanceWarning,
602-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
603-
check_stacklevel=False,
604-
):
605-
super().test_groupby_extension_agg(as_index, data_for_grouping)
573+
super().test_groupby_extension_agg(as_index, data_for_grouping)
606574

607575
def test_in_numeric_groupby(self, data_for_grouping):
608576
if is_string_dtype(data_for_grouping.dtype):
@@ -720,14 +688,20 @@ def test_view(self, data):
720688

721689
class TestBaseMissing(base.BaseMissingTests):
722690
def test_fillna_no_op_returns_copy(self, data):
723-
with tm.maybe_produces_warning(
724-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
725-
):
726-
super().test_fillna_no_op_returns_copy(data)
691+
data = data[~data.isna()]
692+
693+
valid = data[0]
694+
result = data.fillna(valid)
695+
assert result is not data
696+
self.assert_extension_array_equal(result, data)
697+
with tm.assert_produces_warning(PerformanceWarning):
698+
result = data.fillna(method="backfill")
699+
assert result is not data
700+
self.assert_extension_array_equal(result, data)
727701

728702
def test_fillna_series_method(self, data_missing, fillna_method):
729703
with tm.maybe_produces_warning(
730-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
704+
PerformanceWarning, fillna_method is not None, check_stacklevel=False
731705
):
732706
super().test_fillna_series_method(data_missing, fillna_method)
733707

@@ -805,12 +779,6 @@ def test_invert(self, data, request):
805779

806780

807781
class TestBaseMethods(base.BaseMethodsTests):
808-
def test_argsort_missing_array(self, data_missing_for_sorting):
809-
with tm.maybe_produces_warning(
810-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
811-
):
812-
super().test_argsort_missing_array(data_missing_for_sorting)
813-
814782
@pytest.mark.parametrize("periods", [1, -2])
815783
def test_diff(self, data, periods, request):
816784
pa_dtype = data.dtype.pyarrow_dtype
@@ -825,20 +793,10 @@ def test_diff(self, data, periods, request):
825793
)
826794
super().test_diff(data, periods)
827795

828-
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
829796
@pytest.mark.parametrize("dropna", [True, False])
830797
def test_value_counts(self, all_data, dropna, request):
831798
super().test_value_counts(all_data, dropna)
832799

833-
def test_value_counts_with_normalize(self, data, request):
834-
pa_dtype = data.dtype.pyarrow_dtype
835-
with tm.maybe_produces_warning(
836-
PerformanceWarning,
837-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
838-
check_stacklevel=False,
839-
):
840-
super().test_value_counts_with_normalize(data)
841-
842800
def test_argmin_argmax(
843801
self, data_for_sorting, data_missing_for_sorting, na_value, request
844802
):
@@ -886,47 +844,6 @@ def test_argreduce_series(
886844
data_missing_for_sorting, op_name, skipna, expected
887845
)
888846

889-
@pytest.mark.parametrize(
890-
"na_position, expected",
891-
[
892-
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
893-
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
894-
],
895-
)
896-
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
897-
with tm.maybe_produces_warning(
898-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
899-
):
900-
super().test_nargsort(data_missing_for_sorting, na_position, expected)
901-
902-
@pytest.mark.parametrize("ascending", [True, False])
903-
def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
904-
with tm.maybe_produces_warning(
905-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
906-
):
907-
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
908-
909-
@pytest.mark.parametrize("ascending", [True, False])
910-
def test_sort_values_missing(
911-
self, data_missing_for_sorting, ascending, sort_by_key
912-
):
913-
with tm.maybe_produces_warning(
914-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
915-
):
916-
super().test_sort_values_missing(
917-
data_missing_for_sorting, ascending, sort_by_key
918-
)
919-
920-
@pytest.mark.parametrize("ascending", [True, False])
921-
def test_sort_values_frame(self, data_for_sorting, ascending, request):
922-
pa_dtype = data_for_sorting.dtype.pyarrow_dtype
923-
with tm.maybe_produces_warning(
924-
PerformanceWarning,
925-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
926-
check_stacklevel=False,
927-
):
928-
super().test_sort_values_frame(data_for_sorting, ascending)
929-
930847
def test_factorize(self, data_for_grouping, request):
931848
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
932849
if pa.types.is_boolean(pa_dtype):

0 commit comments

Comments
 (0)