Skip to content

TST: Catch more pyarrow PerformanceWarnings #48699

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -565,28 +565,28 @@ def test_isin(dtype, fixed_now_ts):
s = pd.Series(["a", "b", None], dtype=dtype)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", "c"])
expected = pd.Series([True, False, False])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", pd.NA])
expected = pd.Series([True, False, True])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin([])
expected = pd.Series([False, False, False])
tm.assert_series_equal(result, expected)

with tm.maybe_produces_warning(
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
):
result = s.isin(["a", fixed_now_ts])
expected = pd.Series([True, False, False])
Expand Down
20 changes: 13 additions & 7 deletions pandas/tests/base/test_unique.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def test_unique(index_or_series_obj):
obj = np.repeat(obj, range(1, len(obj) + 1))
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
result = obj.unique()

Expand Down Expand Up @@ -59,7 +60,8 @@ def test_unique_null(null_obj, index_or_series_obj):
obj = klass(repeated_values, dtype=obj.dtype)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
result = obj.unique()

Expand Down Expand Up @@ -88,10 +90,11 @@ def test_nunique(index_or_series_obj):
obj = np.repeat(obj, range(1, len(obj) + 1))
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
expected = len(obj.unique())
assert obj.nunique(dropna=False) == expected
assert obj.nunique(dropna=False) == expected


@pytest.mark.parametrize("null_obj", [np.nan, None])
Expand All @@ -116,17 +119,20 @@ def test_nunique_null(null_obj, index_or_series_obj):
else:
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
num_unique_values = len(obj.unique())
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
assert obj.nunique() == max(0, num_unique_values - 1)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
pa_version_under2p0
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
):
assert obj.nunique(dropna=False) == max(0, num_unique_values)

Expand Down
10 changes: 8 additions & 2 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1215,7 +1215,10 @@ def test_unique(self, data, box, method, request):
reason=f"unique has no pyarrow kernel for {pa_dtype}.",
)
)
super().test_unique(data, box, method)
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
):
super().test_unique(data, box, method)

@pytest.mark.parametrize("na_sentinel", [-1, -2])
def test_factorize(self, data_for_grouping, na_sentinel, request):
Expand Down Expand Up @@ -1245,7 +1248,10 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request):
reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}",
)
)
super().test_factorize_equivalence(data_for_grouping, na_sentinel)
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
):
super().test_factorize_equivalence(data_for_grouping, na_sentinel)

def test_factorize_empty(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/extension/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import pytest

from pandas.compat import (
pa_version_under2p0,
pa_version_under6p0,
pa_version_under7p0,
)
Expand Down Expand Up @@ -319,6 +320,26 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
):
super().test_sort_values_frame(data_for_sorting, ascending)

@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
def test_unique(self, data, box, method):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
super().test_unique(data, box, method)

@pytest.mark.parametrize("na_sentinel", [-1, -2])
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
super().test_factorize_equivalence(data_for_grouping, na_sentinel)


class TestCasting(base.BaseCastingTests):
pass
Expand Down
31 changes: 21 additions & 10 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from pandas.compat import (
IS64,
pa_version_under2p0,
pa_version_under7p0,
)
from pandas.errors import PerformanceWarning
Expand Down Expand Up @@ -229,7 +230,12 @@ def test_unique(self, index_flat):
except NotImplementedError:
pass

result = idx.unique()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(index_flat.dtype, "storage", "") == "pyarrow",
):
result = idx.unique()
tm.assert_index_equal(result, idx_unique)

# nans:
Expand All @@ -248,8 +254,14 @@ def test_unique(self, index_flat):
assert idx_unique_nan.dtype == index.dtype

expected = idx_unique_nan
for i in [idx_nan, idx_unique_nan]:
result = i.unique()
for pos, i in enumerate([idx_nan, idx_unique_nan]):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0
and getattr(index_flat.dtype, "storage", "") == "pyarrow"
and pos == 0,
):
result = i.unique()
tm.assert_index_equal(result, expected)

def test_searchsorted_monotonic(self, index_flat, request):
Expand Down Expand Up @@ -466,13 +478,12 @@ def test_hasnans_isnans(self, index_flat):

@pytest.mark.parametrize("na_position", [None, "middle"])
def test_sort_values_invalid_na_position(index_with_missing, na_position):
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0
and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
with tm.maybe_produces_warning(
PerformanceWarning,
getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
check_stacklevel=False,
):
index_with_missing.sort_values(na_position=na_position)


Expand Down
16 changes: 10 additions & 6 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under7p0
from pandas.compat import (
pa_version_under2p0,
pa_version_under7p0,
)
from pandas.errors import PerformanceWarning

from pandas.core.dtypes.cast import find_common_type
Expand Down Expand Up @@ -573,14 +576,15 @@ def test_intersection_duplicates_all_indexes(index):
# No duplicates in empty indexes
return

def check_intersection_commutative(left, right):
assert left.intersection(right).equals(right.intersection(left))

idx = index
idx_non_unique = idx[[0, 0, 1, 2]]

check_intersection_commutative(idx, idx_non_unique)
assert idx.intersection(idx_non_unique).is_unique
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow",
):
assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
assert idx.intersection(idx_non_unique).is_unique


@pytest.mark.parametrize(
Expand Down