Skip to content

Commit 8baedc1

Browse files
authored
TST/CLN: Remove unnecessary pyarrow version checking (#51545)
* cleanups * Add back some checks * Unneeded warning checking * fix fillna fallback warnings * Remove tm.assert_produces_warning
1 parent 9935690 commit 8baedc1

File tree

10 files changed

+61
-375
lines changed

10 files changed

+61
-375
lines changed

pandas/core/arrays/arrow/array.py

+4-11
Original file line numberDiff line numberDiff line change
@@ -569,14 +569,8 @@ def argsort(
569569
) -> np.ndarray:
570570
order = "ascending" if ascending else "descending"
571571
null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None)
572-
if null_placement is None or pa_version_under7p0:
573-
# Although pc.array_sort_indices exists in version 6
574-
# there's a bug that affects the pa.ChunkedArray backing
575-
# https://issues.apache.org/jira/browse/ARROW-12042
576-
fallback_performancewarning("7")
577-
return super().argsort(
578-
ascending=ascending, kind=kind, na_position=na_position
579-
)
572+
if null_placement is None:
573+
raise ValueError(f"invalid na_position: {na_position}")
580574

581575
result = pc.array_sort_indices(
582576
self._data, order=order, null_placement=null_placement
@@ -640,9 +634,8 @@ def fillna(
640634
if limit is not None:
641635
return super().fillna(value=value, method=method, limit=limit)
642636

643-
if method is not None and pa_version_under7p0:
644-
# fill_null_{forward|backward} added in pyarrow 7.0
645-
fallback_performancewarning(version="7")
637+
if method is not None:
638+
fallback_performancewarning()
646639
return super().fillna(value=value, method=method, limit=limit)
647640

648641
if is_array_like(value):

pandas/tests/base/test_value_counts.py

+6-33
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import pa_version_under7p0
8-
from pandas.errors import PerformanceWarning
9-
107
import pandas as pd
118
from pandas import (
129
DatetimeIndex,
@@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj):
4845
# TODO(GH#32514): Order of entries with the same count is inconsistent
4946
# on CI (gh-32449)
5047
if obj.duplicated().any():
51-
with tm.maybe_produces_warning(
52-
PerformanceWarning,
53-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
54-
):
55-
result = result.sort_index()
56-
with tm.maybe_produces_warning(
57-
PerformanceWarning,
58-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
59-
):
60-
expected = expected.sort_index()
48+
result = result.sort_index()
49+
expected = expected.sort_index()
6150
tm.assert_series_equal(result, expected)
6251

6352

@@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
9786
if obj.duplicated().any():
9887
# TODO(GH#32514):
9988
# Order of entries with the same count is inconsistent on CI (gh-32449)
100-
with tm.maybe_produces_warning(
101-
PerformanceWarning,
102-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
103-
):
104-
expected = expected.sort_index()
105-
with tm.maybe_produces_warning(
106-
PerformanceWarning,
107-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
108-
):
109-
result = result.sort_index()
89+
expected = expected.sort_index()
90+
result = result.sort_index()
11091

11192
if not isinstance(result.dtype, np.dtype):
11293
# i.e IntegerDtype
@@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
119100
if obj.duplicated().any():
120101
# TODO(GH#32514):
121102
# Order of entries with the same count is inconsistent on CI (gh-32449)
122-
with tm.maybe_produces_warning(
123-
PerformanceWarning,
124-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
125-
):
126-
expected = expected.sort_index()
127-
with tm.maybe_produces_warning(
128-
PerformanceWarning,
129-
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
130-
):
131-
result = result.sort_index()
103+
expected = expected.sort_index()
104+
result = result.sort_index()
132105
tm.assert_series_equal(result, expected)
133106

134107

pandas/tests/extension/test_arrow.py

+14-97
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,6 @@
5959

6060
from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip
6161

62-
pytestmark = pytest.mark.filterwarnings(
63-
"ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly"
64-
)
65-
6662

6763
@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
6864
def dtype(request):
@@ -311,14 +307,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
311307
)
312308
)
313309
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
314-
if pa_version_under7p0:
315-
request.node.add_marker(
316-
pytest.mark.xfail(
317-
raises=pa.ArrowNotImplementedError,
318-
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
319-
)
320-
)
321-
elif is_platform_windows() and is_ci_environment():
310+
if is_platform_windows() and is_ci_environment():
322311
request.node.add_marker(
323312
pytest.mark.xfail(
324313
raises=pa.ArrowInvalid,
@@ -561,23 +550,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
561550
reason=f"{pa_dtype} only has 2 unique possible values",
562551
)
563552
)
564-
with tm.maybe_produces_warning(
565-
PerformanceWarning,
566-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
567-
check_stacklevel=False,
568-
):
569-
super().test_groupby_extension_transform(data_for_grouping)
570-
571-
def test_groupby_extension_apply(
572-
self, data_for_grouping, groupby_apply_op, request
573-
):
574-
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
575-
with tm.maybe_produces_warning(
576-
PerformanceWarning,
577-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
578-
check_stacklevel=False,
579-
):
580-
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
553+
super().test_groupby_extension_transform(data_for_grouping)
581554

582555
@pytest.mark.parametrize("as_index", [True, False])
583556
def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
@@ -589,12 +562,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
589562
reason=f"{pa_dtype} only has 2 unique possible values",
590563
)
591564
)
592-
with tm.maybe_produces_warning(
593-
PerformanceWarning,
594-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
595-
check_stacklevel=False,
596-
):
597-
super().test_groupby_extension_agg(as_index, data_for_grouping)
565+
super().test_groupby_extension_agg(as_index, data_for_grouping)
598566

599567
def test_in_numeric_groupby(self, data_for_grouping):
600568
if is_string_dtype(data_for_grouping.dtype):
@@ -712,14 +680,20 @@ def test_view(self, data):
712680

713681
class TestBaseMissing(base.BaseMissingTests):
714682
def test_fillna_no_op_returns_copy(self, data):
715-
with tm.maybe_produces_warning(
716-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
717-
):
718-
super().test_fillna_no_op_returns_copy(data)
683+
data = data[~data.isna()]
684+
685+
valid = data[0]
686+
result = data.fillna(valid)
687+
assert result is not data
688+
self.assert_extension_array_equal(result, data)
689+
with tm.assert_produces_warning(PerformanceWarning):
690+
result = data.fillna(method="backfill")
691+
assert result is not data
692+
self.assert_extension_array_equal(result, data)
719693

720694
def test_fillna_series_method(self, data_missing, fillna_method):
721695
with tm.maybe_produces_warning(
722-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
696+
PerformanceWarning, fillna_method is not None, check_stacklevel=False
723697
):
724698
super().test_fillna_series_method(data_missing, fillna_method)
725699

@@ -797,12 +771,6 @@ def test_invert(self, data, request):
797771

798772

799773
class TestBaseMethods(base.BaseMethodsTests):
800-
def test_argsort_missing_array(self, data_missing_for_sorting):
801-
with tm.maybe_produces_warning(
802-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
803-
):
804-
super().test_argsort_missing_array(data_missing_for_sorting)
805-
806774
@pytest.mark.parametrize("periods", [1, -2])
807775
def test_diff(self, data, periods, request):
808776
pa_dtype = data.dtype.pyarrow_dtype
@@ -817,20 +785,10 @@ def test_diff(self, data, periods, request):
817785
)
818786
super().test_diff(data, periods)
819787

820-
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
821788
@pytest.mark.parametrize("dropna", [True, False])
822789
def test_value_counts(self, all_data, dropna, request):
823790
super().test_value_counts(all_data, dropna)
824791

825-
def test_value_counts_with_normalize(self, data, request):
826-
pa_dtype = data.dtype.pyarrow_dtype
827-
with tm.maybe_produces_warning(
828-
PerformanceWarning,
829-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
830-
check_stacklevel=False,
831-
):
832-
super().test_value_counts_with_normalize(data)
833-
834792
def test_argmin_argmax(
835793
self, data_for_sorting, data_missing_for_sorting, na_value, request
836794
):
@@ -878,47 +836,6 @@ def test_argreduce_series(
878836
data_missing_for_sorting, op_name, skipna, expected
879837
)
880838

881-
@pytest.mark.parametrize(
882-
"na_position, expected",
883-
[
884-
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
885-
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
886-
],
887-
)
888-
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
889-
with tm.maybe_produces_warning(
890-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
891-
):
892-
super().test_nargsort(data_missing_for_sorting, na_position, expected)
893-
894-
@pytest.mark.parametrize("ascending", [True, False])
895-
def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
896-
with tm.maybe_produces_warning(
897-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
898-
):
899-
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
900-
901-
@pytest.mark.parametrize("ascending", [True, False])
902-
def test_sort_values_missing(
903-
self, data_missing_for_sorting, ascending, sort_by_key
904-
):
905-
with tm.maybe_produces_warning(
906-
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
907-
):
908-
super().test_sort_values_missing(
909-
data_missing_for_sorting, ascending, sort_by_key
910-
)
911-
912-
@pytest.mark.parametrize("ascending", [True, False])
913-
def test_sort_values_frame(self, data_for_sorting, ascending, request):
914-
pa_dtype = data_for_sorting.dtype.pyarrow_dtype
915-
with tm.maybe_produces_warning(
916-
PerformanceWarning,
917-
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
918-
check_stacklevel=False,
919-
):
920-
super().test_sort_values_frame(data_for_sorting, ascending)
921-
922839
def test_factorize(self, data_for_grouping, request):
923840
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
924841
if pa.types.is_boolean(pa_dtype):

0 commit comments

Comments
 (0)