TST: Filter/catch pyarrow PerformanceWarnings (#48208)

mroeschke · web-flow · commit 1b2cc268e485 · 2022-08-26T23:38:05.000+02:00
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -25,9 +25,11 @@
     pa_version_under3p0,
     pa_version_under4p0,
     pa_version_under6p0,
+    pa_version_under7p0,
     pa_version_under8p0,
     pa_version_under9p0,
 )
+from pandas.errors import PerformanceWarning
 
 import pandas as pd
 import pandas._testing as tm
@@ -446,7 +448,10 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
                     reason=f"pyarrow doesn't support factorizing {pa_dtype}",
                 )
             )
-        super().test_groupby_extension_transform(data_for_grouping)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_groupby_extension_transform(data_for_grouping)
 
     def test_groupby_extension_apply(
         self, data_for_grouping, groupby_apply_op, request
@@ -479,7 +484,10 @@ def test_groupby_extension_apply(
                         reason="GH 34986",
                     )
                 )
-        super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
 
     def test_in_numeric_groupby(self, data_for_grouping, request):
         pa_dtype = data_for_grouping.dtype.pyarrow_dtype
@@ -518,7 +526,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
                     reason="GH 34986",
                 )
             )
-        super().test_groupby_extension_agg(as_index, data_for_grouping)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_groupby_extension_agg(as_index, data_for_grouping)
 
 
 class TestBaseDtype(base.BaseDtypeTests):
@@ -607,6 +618,10 @@ def test_view(self, data):
 
 
 class TestBaseMissing(base.BaseMissingTests):
+    @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
+    def test_dropna_array(self, data_missing):
+        super().test_dropna_array(data_missing)
+
     def test_fillna_limit_pad(self, data_missing, using_array_manager, request):
         if using_array_manager and pa.types.is_duration(
             data_missing.dtype.pyarrow_dtype
@@ -1331,6 +1346,12 @@ def test_invert(self, data, request):
 
 
 class TestBaseMethods(base.BaseMethodsTests):
+    def test_argsort_missing_array(self, data_missing_for_sorting):
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_argsort_missing_array(data_missing_for_sorting)
+
     @pytest.mark.parametrize("periods", [1, -2])
     def test_diff(self, data, periods, request):
         pa_dtype = data.dtype.pyarrow_dtype
@@ -1345,6 +1366,7 @@ def test_diff(self, data, periods, request):
             )
         super().test_diff(data, periods)
 
+    @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
     @pytest.mark.parametrize("dropna", [True, False])
     def test_value_counts(self, all_data, dropna, request):
         pa_dtype = all_data.dtype.pyarrow_dtype
@@ -1384,7 +1406,10 @@ def test_value_counts_with_normalize(self, data, request):
                     reason=f"value_count has no pyarrow kernel for {pa_dtype}",
                 )
             )
-        super().test_value_counts_with_normalize(data)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_value_counts_with_normalize(data)
 
     @pytest.mark.xfail(
         pa_version_under6p0,
@@ -1445,6 +1470,19 @@ def test_argreduce_series(
             data_missing_for_sorting, op_name, skipna, expected
         )
 
+    @pytest.mark.parametrize(
+        "na_position, expected",
+        [
+            ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
+            ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
+        ],
+    )
+    def test_nargsort(self, data_missing_for_sorting, na_position, expected):
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_nargsort(data_missing_for_sorting, na_position, expected)
+
     @pytest.mark.parametrize("ascending", [True, False])
     def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
         pa_dtype = data_for_sorting.dtype.pyarrow_dtype
@@ -1458,7 +1496,21 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
                     ),
                 )
             )
-        super().test_sort_values(data_for_sorting, ascending, sort_by_key)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_sort_values(data_for_sorting, ascending, sort_by_key)
+
+    @pytest.mark.parametrize("ascending", [True, False])
+    def test_sort_values_missing(
+        self, data_missing_for_sorting, ascending, sort_by_key
+    ):
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_sort_values_missing(
+                data_missing_for_sorting, ascending, sort_by_key
+            )
 
     @pytest.mark.parametrize("ascending", [True, False])
     def test_sort_values_frame(self, data_for_sorting, ascending, request):
@@ -1473,7 +1525,10 @@ def test_sort_values_frame(self, data_for_sorting, ascending, request):
                     ),
                 )
             )
-        super().test_sort_values_frame(data_for_sorting, ascending)
+        with tm.maybe_produces_warning(
+            PerformanceWarning, pa_version_under7p0, check_stacklevel=False
+        ):
+            super().test_sort_values_frame(data_for_sorting, ascending)
 
     @pytest.mark.parametrize("box", [pd.Series, lambda x: x])
     @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -285,7 +285,7 @@ def test_memory_usage(self, index):
         if index.inferred_type == "object":
             assert result3 > result2
 
-    def test_argsort(self, request, index):
+    def test_argsort(self, index):
         # separately tested
         if isinstance(index, CategoricalIndex):
             return
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -8,8 +8,14 @@
 import numpy as np
 import pytest
 
-from pandas.compat import IS64
-from pandas.errors import InvalidIndexError
+from pandas.compat import (
+    IS64,
+    pa_version_under7p0,
+)
+from pandas.errors import (
+    InvalidIndexError,
+    PerformanceWarning,
+)
 from pandas.util._test_decorators import async_mark
 
 import pandas as pd
@@ -62,6 +68,22 @@ def test_new_axis(self, index):
         assert new_index.ndim == 2
         assert isinstance(new_index, np.ndarray)
 
+    def test_argsort(self, index):
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            super().test_argsort(index)
+
+    def test_numpy_argsort(self, index):
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            super().test_numpy_argsort(index)
+
     def test_constructor_regular(self, index):
         tm.assert_contains_all(index, index)
 
diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -12,6 +12,7 @@
     IS64,
     pa_version_under7p0,
 )
+from pandas.errors import PerformanceWarning
 
 from pandas.core.dtypes.common import is_integer_dtype
 
@@ -169,7 +170,12 @@ def test_copy_name(self, index_flat):
         s1 = pd.Series(2, index=first)
         s2 = pd.Series(3, index=second[:-1])
         # See GH#13365
-        s3 = s1 * s2
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            s3 = s1 * s2
         assert s3.index.name == "mario"
 
     def test_copy_name2(self, index_flat):
@@ -460,9 +466,14 @@ def test_hasnans_isnans(self, index_flat):
 
 @pytest.mark.parametrize("na_position", [None, "middle"])
 def test_sort_values_invalid_na_position(index_with_missing, na_position):
-
-    with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
-        index_with_missing.sort_values(na_position=na_position)
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0
+        and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
+            index_with_missing.sort_values(na_position=na_position)
 
 
 @pytest.mark.parametrize("na_position", ["first", "last"])
@@ -488,7 +499,13 @@ def test_sort_values_with_missing(index_with_missing, na_position, request):
     # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
     expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
 
-    result = index_with_missing.sort_values(na_position=na_position)
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0
+        and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        result = index_with_missing.sort_values(na_position=na_position)
     tm.assert_index_equal(result, expected)
 
 
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -9,6 +9,7 @@
 import pytest
 
 from pandas.compat import pa_version_under7p0
+from pandas.errors import PerformanceWarning
 
 from pandas.core.dtypes.cast import find_common_type
 
@@ -38,8 +39,18 @@
 def test_union_same_types(index):
     # Union with a non-unique, non-monotonic index raises error
     # Only needed for bool index factory
-    idx1 = index.sort_values()
-    idx2 = index.sort_values()
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        idx1 = index.sort_values()
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        idx2 = index.sort_values()
     assert idx1.union(idx2).dtype == idx1.dtype
 
 
@@ -98,8 +109,18 @@ def test_union_different_types(index_flat, index_flat2, request):
 
     # Union with a non-unique, non-monotonic index raises error
     # This applies to the boolean index
-    idx1 = idx1.sort_values()
-    idx2 = idx2.sort_values()
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0 and getattr(idx1.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        idx1 = idx1.sort_values()
+    with tm.maybe_produces_warning(
+        PerformanceWarning,
+        pa_version_under7p0 and getattr(idx2.dtype, "storage", "") == "pyarrow",
+        check_stacklevel=False,
+    ):
+        idx2 = idx2.sort_values()
 
     with tm.assert_produces_warning(warn, match="'<' not supported between"):
         res1 = idx1.union(idx2)
@@ -231,6 +252,9 @@ def test_intersection_base(self, index):
             with pytest.raises(TypeError, match=msg):
                 first.intersection([1, 2, 3])
 
+    @pytest.mark.filterwarnings(
+        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
+    )
     def test_union_base(self, index):
         first = index[3:]
         second = index[:5]
@@ -255,6 +279,9 @@ def test_union_base(self, index):
             with pytest.raises(TypeError, match=msg):
                 first.union([1, 2, 3])
 
+    @pytest.mark.filterwarnings(
+        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
+    )
     def test_difference_base(self, sort, index):
         first = index[2:]
         second = index[:4]
@@ -280,6 +307,9 @@ def test_difference_base(self, sort, index):
             with pytest.raises(TypeError, match=msg):
                 first.difference([1, 2, 3], sort)
 
+    @pytest.mark.filterwarnings(
+        "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
+    )
     def test_symmetric_difference(self, index):
         if isinstance(index, CategoricalIndex):
             return
@@ -371,8 +401,18 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name):
         # test copy.union(subset) - need sort for unicode and string
         first = index.copy().set_names(fname)
         second = index[1:].set_names(sname)
-        union = first.union(second).sort_values()
-        expected = index.set_names(expected_name).sort_values()
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            union = first.union(second).sort_values()
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            expected = index.set_names(expected_name).sort_values()
         tm.assert_index_equal(union, expected)
 
     @pytest.mark.parametrize(
@@ -438,8 +478,18 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
         # test copy.intersection(subset) - need sort for unicode and string
         first = index.copy().set_names(fname)
         second = index[1:].set_names(sname)
-        intersect = first.intersection(second).sort_values()
-        expected = index[1:].set_names(expected_name).sort_values()
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            intersect = first.intersection(second).sort_values()
+        with tm.maybe_produces_warning(
+            PerformanceWarning,
+            pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
+            check_stacklevel=False,
+        ):
+            expected = index[1:].set_names(expected_name).sort_values()
         tm.assert_index_equal(intersect, expected)
 
     def test_intersection_name_retention_with_nameless(self, index):
@@ -495,6 +545,9 @@ def test_intersection_difference_match_empty(self, index, sort):
         tm.assert_index_equal(inter, diff, exact=True)
 
 
+@pytest.mark.filterwarnings(
+    "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
+)
 @pytest.mark.parametrize(
     "method", ["intersection", "union", "difference", "symmetric_difference"]
 )