diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 43c52ef8848e2..4c22487a3affe 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -25,9 +25,11 @@ pa_version_under3p0, pa_version_under4p0, pa_version_under6p0, + pa_version_under7p0, pa_version_under8p0, pa_version_under9p0, ) +from pandas.errors import PerformanceWarning import pandas as pd import pandas._testing as tm @@ -446,7 +448,10 @@ def test_groupby_extension_transform(self, data_for_grouping, request): reason=f"pyarrow doesn't support factorizing {pa_dtype}", ) ) - super().test_groupby_extension_transform(data_for_grouping) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_transform(data_for_grouping) def test_groupby_extension_apply( self, data_for_grouping, groupby_apply_op, request @@ -479,7 +484,10 @@ def test_groupby_extension_apply( reason="GH 34986", ) ) - super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) def test_in_numeric_groupby(self, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype @@ -518,7 +526,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request): reason="GH 34986", ) ) - super().test_groupby_extension_agg(as_index, data_for_grouping) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_groupby_extension_agg(as_index, data_for_grouping) class TestBaseDtype(base.BaseDtypeTests): @@ -607,6 +618,10 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") + def test_dropna_array(self, data_missing): + super().test_dropna_array(data_missing) + def test_fillna_limit_pad(self, data_missing, using_array_manager, request): if using_array_manager and pa.types.is_duration( data_missing.dtype.pyarrow_dtype @@ -1331,6 +1346,12 @@ def test_invert(self, data, request): class TestBaseMethods(base.BaseMethodsTests): + def test_argsort_missing_array(self, data_missing_for_sorting): + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_argsort_missing_array(data_missing_for_sorting) + @pytest.mark.parametrize("periods", [1, -2]) def test_diff(self, data, periods, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1345,6 +1366,7 @@ def test_diff(self, data, periods, request): ) super().test_diff(data, periods) + @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): pa_dtype = all_data.dtype.pyarrow_dtype @@ -1384,7 +1406,10 @@ def test_value_counts_with_normalize(self, data, request): reason=f"value_count has no pyarrow kernel for {pa_dtype}", ) ) - super().test_value_counts_with_normalize(data) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_value_counts_with_normalize(data) @pytest.mark.xfail( pa_version_under6p0, @@ -1445,6 +1470,19 @@ def test_argreduce_series( data_missing_for_sorting, op_name, skipna, expected ) + @pytest.mark.parametrize( + "na_position, expected", + [ + ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), + ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), + ], + ) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_nargsort(data_missing_for_sorting, na_position, expected) + @pytest.mark.parametrize("ascending", [True, False]) def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): pa_dtype = data_for_sorting.dtype.pyarrow_dtype @@ -1458,7 +1496,21 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): ), ) ) - super().test_sort_values(data_for_sorting, ascending, sort_by_key) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_sort_values(data_for_sorting, ascending, sort_by_key) + + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_missing( + self, data_missing_for_sorting, ascending, sort_by_key + ): + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_sort_values_missing( + data_missing_for_sorting, ascending, sort_by_key + ) @pytest.mark.parametrize("ascending", [True, False]) def test_sort_values_frame(self, data_for_sorting, ascending, request): @@ -1473,7 +1525,10 @@ def test_sort_values_frame(self, data_for_sorting, ascending, request): ), ) ) - super().test_sort_values_frame(data_for_sorting, ascending) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under7p0, check_stacklevel=False + ): + super().test_sort_values_frame(data_for_sorting, ascending) @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 3f8c679c6162f..85833224fea10 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -285,7 +285,7 @@ def test_memory_usage(self, index): if index.inferred_type == "object": assert result3 > result2 - def test_argsort(self, request, index): + def test_argsort(self, index): # separately tested if isinstance(index, CategoricalIndex): return diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 43b893b084672..ac76953c66a24 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -8,8 +8,14 @@ import numpy as np import pytest -from pandas.compat import IS64 -from pandas.errors import InvalidIndexError +from pandas.compat import ( + IS64, + pa_version_under7p0, +) +from pandas.errors import ( + InvalidIndexError, + PerformanceWarning, +) from pandas.util._test_decorators import async_mark import pandas as pd @@ -62,6 +68,22 @@ def test_new_axis(self, index): assert new_index.ndim == 2 assert isinstance(new_index, np.ndarray) + def test_argsort(self, index): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_argsort(index) + + def test_numpy_argsort(self, index): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_numpy_argsort(index) + def test_constructor_regular(self, index): tm.assert_contains_all(index, index) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 40a107658231d..c81b3a533170e 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -12,6 +12,7 @@ IS64, pa_version_under7p0, ) +from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import is_integer_dtype @@ -169,7 +170,12 @@ def test_copy_name(self, index_flat): s1 = pd.Series(2, index=first) s2 = pd.Series(3, index=second[:-1]) # See GH#13365 - s3 = s1 * s2 + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + s3 = s1 * s2 assert s3.index.name == "mario" def test_copy_name2(self, index_flat): @@ -460,9 +466,14 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - - with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): - index_with_missing.sort_values(na_position=na_position) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(index_with_missing.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + index_with_missing.sort_values(na_position=na_position) @pytest.mark.parametrize("na_position", ["first", "last"]) @@ -488,7 +499,13 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) - result = index_with_missing.sort_values(na_position=na_position) + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 + and getattr(index_with_missing.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + result = index_with_missing.sort_values(na_position=na_position) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index d2f6a1736ff7c..f869fa12c5438 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -9,6 +9,7 @@ import pytest from pandas.compat import pa_version_under7p0 +from pandas.errors import PerformanceWarning from pandas.core.dtypes.cast import find_common_type @@ -38,8 +39,18 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - idx1 = index.sort_values() - idx2 = index.sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + idx1 = index.sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + idx2 = index.sort_values() assert idx1.union(idx2).dtype == idx1.dtype @@ -98,8 +109,18 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - idx1 = idx1.sort_values() - idx2 = idx2.sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(idx1.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + idx1 = idx1.sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(idx2.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + idx2 = idx2.sort_values() with tm.assert_produces_warning(warn, match="'<' not supported between"): res1 = idx1.union(idx2) @@ -231,6 +252,9 @@ def test_intersection_base(self, index): with pytest.raises(TypeError, match=msg): first.intersection([1, 2, 3]) + @pytest.mark.filterwarnings( + "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" + ) def test_union_base(self, index): first = index[3:] second = index[:5] @@ -255,6 +279,9 @@ def test_union_base(self, index): with pytest.raises(TypeError, match=msg): first.union([1, 2, 3]) + @pytest.mark.filterwarnings( + "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" + ) def test_difference_base(self, sort, index): first = index[2:] second = index[:4] @@ -280,6 +307,9 @@ def test_difference_base(self, sort, index): with pytest.raises(TypeError, match=msg): first.difference([1, 2, 3], sort) + @pytest.mark.filterwarnings( + "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" + ) def test_symmetric_difference(self, index): if isinstance(index, CategoricalIndex): return @@ -371,8 +401,18 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - union = first.union(second).sort_values() - expected = index.set_names(expected_name).sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + union = first.union(second).sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + expected = index.set_names(expected_name).sort_values() tm.assert_index_equal(union, expected) @pytest.mark.parametrize( @@ -438,8 +478,18 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - intersect = first.intersection(second).sort_values() - expected = index[1:].set_names(expected_name).sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + intersect = first.intersection(second).sort_values() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + expected = index[1:].set_names(expected_name).sort_values() tm.assert_index_equal(intersect, expected) def test_intersection_name_retention_with_nameless(self, index): @@ -495,6 +545,9 @@ def test_intersection_difference_match_empty(self, index, sort): tm.assert_index_equal(inter, diff, exact=True) +@pytest.mark.filterwarnings( + "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning" +) @pytest.mark.parametrize( "method", ["intersection", "union", "difference", "symmetric_difference"] )