From a66d7dabf5b346b0683902679cd58b207335a5cb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 21 Sep 2022 16:06:36 -0700 Subject: [PATCH] TST: Catch more pyarrow PerformanceWarnings --- pandas/tests/arrays/string_/test_string.py | 8 +++--- pandas/tests/base/test_unique.py | 20 +++++++++----- pandas/tests/extension/test_arrow.py | 10 +++++-- pandas/tests/extension/test_string.py | 21 +++++++++++++++ pandas/tests/indexes/test_common.py | 31 +++++++++++++++------- pandas/tests/indexes/test_setops.py | 16 ++++++----- 6 files changed, 77 insertions(+), 29 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 0a48d8e2a4983..a7b8162eb981a 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -565,28 +565,28 @@ def test_isin(dtype, fixed_now_ts): s = pd.Series(["a", "b", None], dtype=dtype) with tm.maybe_produces_warning( - PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 ): result = s.isin(["a", "c"]) expected = pd.Series([True, False, False]) tm.assert_series_equal(result, expected) with tm.maybe_produces_warning( - PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 ): result = s.isin(["a", pd.NA]) expected = pd.Series([True, False, True]) tm.assert_series_equal(result, expected) with tm.maybe_produces_warning( - PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 ): result = s.isin([]) expected = pd.Series([False, False, False]) tm.assert_series_equal(result, expected) with tm.maybe_produces_warning( - PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0 + PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 ): result = s.isin(["a", fixed_now_ts]) expected = pd.Series([True, False, False]) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index eac1e35699585..46b11ac533c7b 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -17,7 +17,8 @@ def test_unique(index_or_series_obj): obj = np.repeat(obj, range(1, len(obj) + 1)) with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): result = obj.unique() @@ -59,7 +60,8 @@ def test_unique_null(null_obj, index_or_series_obj): obj = klass(repeated_values, dtype=obj.dtype) with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): result = obj.unique() @@ -88,10 +90,11 @@ def test_nunique(index_or_series_obj): obj = np.repeat(obj, range(1, len(obj) + 1)) with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): expected = len(obj.unique()) - assert obj.nunique(dropna=False) == expected + assert obj.nunique(dropna=False) == expected @pytest.mark.parametrize("null_obj", [np.nan, None]) @@ -116,17 +119,20 @@ def test_nunique_null(null_obj, index_or_series_obj): else: with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): num_unique_values = len(obj.unique()) with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): assert obj.nunique() == max(0, num_unique_values - 1) with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]", + pa_version_under2p0 + and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", ): assert obj.nunique(dropna=False) == max(0, num_unique_values) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 32e1b4fd3ac92..e6bf8569ef007 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1215,7 +1215,10 @@ def test_unique(self, data, box, method, request): reason=f"unique has no pyarrow kernel for {pa_dtype}.", ) ) - super().test_unique(data, box, method) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under2p0, check_stacklevel=False + ): + super().test_unique(data, box, method) @pytest.mark.parametrize("na_sentinel", [-1, -2]) def test_factorize(self, data_for_grouping, na_sentinel, request): @@ -1245,7 +1248,10 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request): reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}", ) ) - super().test_factorize_equivalence(data_for_grouping, na_sentinel) + with tm.maybe_produces_warning( + PerformanceWarning, pa_version_under2p0, check_stacklevel=False + ): + super().test_factorize_equivalence(data_for_grouping, na_sentinel) def test_factorize_empty(self, data, request): pa_dtype = data.dtype.pyarrow_dtype diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 73a2e01770028..e2e4475cd520d 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -19,6 +19,7 @@ import pytest from pandas.compat import ( + pa_version_under2p0, pa_version_under6p0, pa_version_under7p0, ) @@ -319,6 +320,26 @@ def test_sort_values_frame(self, data_for_sorting, ascending): ): super().test_sort_values_frame(data_for_sorting, ascending) + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_unique(data, box, method) + + @pytest.mark.parametrize("na_sentinel", [-1, -2]) + def test_factorize_equivalence(self, data_for_grouping, na_sentinel): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 + and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): + super().test_factorize_equivalence(data_for_grouping, na_sentinel) + class TestCasting(base.BaseCastingTests): pass diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index c81b3a533170e..f4d958999b981 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -10,6 +10,7 @@ from pandas.compat import ( IS64, + pa_version_under2p0, pa_version_under7p0, ) from pandas.errors import PerformanceWarning @@ -229,7 +230,12 @@ def test_unique(self, index_flat): except NotImplementedError: pass - result = idx.unique() + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 + and getattr(index_flat.dtype, "storage", "") == "pyarrow", + ): + result = idx.unique() tm.assert_index_equal(result, idx_unique) # nans: @@ -248,8 +254,14 @@ def test_unique(self, index_flat): assert idx_unique_nan.dtype == index.dtype expected = idx_unique_nan - for i in [idx_nan, idx_unique_nan]: - result = i.unique() + for pos, i in enumerate([idx_nan, idx_unique_nan]): + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 + and getattr(index_flat.dtype, "storage", "") == "pyarrow" + and pos == 0, + ): + result = i.unique() tm.assert_index_equal(result, expected) def test_searchsorted_monotonic(self, index_flat, request): @@ -466,13 +478,12 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(index_with_missing.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): + with tm.maybe_produces_warning( + PerformanceWarning, + getattr(index_with_missing.dtype, "storage", "") == "pyarrow", + check_stacklevel=False, + ): index_with_missing.sort_values(na_position=na_position) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index f869fa12c5438..941f92111f155 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -8,7 +8,10 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 +from pandas.compat import ( + pa_version_under2p0, + pa_version_under7p0, +) from pandas.errors import PerformanceWarning from pandas.core.dtypes.cast import find_common_type @@ -573,14 +576,15 @@ def test_intersection_duplicates_all_indexes(index): # No duplicates in empty indexes return - def check_intersection_commutative(left, right): - assert left.intersection(right).equals(right.intersection(left)) - idx = index idx_non_unique = idx[[0, 0, 1, 2]] - check_intersection_commutative(idx, idx_non_unique) - assert idx.intersection(idx_non_unique).is_unique + with tm.maybe_produces_warning( + PerformanceWarning, + pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow", + ): + assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx)) + assert idx.intersection(idx_non_unique).is_unique @pytest.mark.parametrize(