Skip to content

Commit 035ea3b

Browse files
mroeschkenoatamir
authored andcommitted
TST: Catch more pyarrow PerformanceWarnings (pandas-dev#48699)
1 parent 4d3b25b commit 035ea3b

File tree

6 files changed

+77
-29
lines changed

6 files changed

+77
-29
lines changed

pandas/tests/arrays/string_/test_string.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -565,28 +565,28 @@ def test_isin(dtype, fixed_now_ts):
565565
s = pd.Series(["a", "b", None], dtype=dtype)
566566

567567
with tm.maybe_produces_warning(
568-
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
568+
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
569569
):
570570
result = s.isin(["a", "c"])
571571
expected = pd.Series([True, False, False])
572572
tm.assert_series_equal(result, expected)
573573

574574
with tm.maybe_produces_warning(
575-
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
575+
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
576576
):
577577
result = s.isin(["a", pd.NA])
578578
expected = pd.Series([True, False, True])
579579
tm.assert_series_equal(result, expected)
580580

581581
with tm.maybe_produces_warning(
582-
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
582+
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
583583
):
584584
result = s.isin([])
585585
expected = pd.Series([False, False, False])
586586
tm.assert_series_equal(result, expected)
587587

588588
with tm.maybe_produces_warning(
589-
PerformanceWarning, dtype == "pyarrow" and pa_version_under2p0
589+
PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0
590590
):
591591
result = s.isin(["a", fixed_now_ts])
592592
expected = pd.Series([True, False, False])

pandas/tests/base/test_unique.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ def test_unique(index_or_series_obj):
1717
obj = np.repeat(obj, range(1, len(obj) + 1))
1818
with tm.maybe_produces_warning(
1919
PerformanceWarning,
20-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
20+
pa_version_under2p0
21+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
2122
):
2223
result = obj.unique()
2324

@@ -59,7 +60,8 @@ def test_unique_null(null_obj, index_or_series_obj):
5960
obj = klass(repeated_values, dtype=obj.dtype)
6061
with tm.maybe_produces_warning(
6162
PerformanceWarning,
62-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
63+
pa_version_under2p0
64+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
6365
):
6466
result = obj.unique()
6567

@@ -88,10 +90,11 @@ def test_nunique(index_or_series_obj):
8890
obj = np.repeat(obj, range(1, len(obj) + 1))
8991
with tm.maybe_produces_warning(
9092
PerformanceWarning,
91-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
93+
pa_version_under2p0
94+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
9295
):
9396
expected = len(obj.unique())
94-
assert obj.nunique(dropna=False) == expected
97+
assert obj.nunique(dropna=False) == expected
9598

9699

97100
@pytest.mark.parametrize("null_obj", [np.nan, None])
@@ -116,17 +119,20 @@ def test_nunique_null(null_obj, index_or_series_obj):
116119
else:
117120
with tm.maybe_produces_warning(
118121
PerformanceWarning,
119-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
122+
pa_version_under2p0
123+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
120124
):
121125
num_unique_values = len(obj.unique())
122126
with tm.maybe_produces_warning(
123127
PerformanceWarning,
124-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
128+
pa_version_under2p0
129+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
125130
):
126131
assert obj.nunique() == max(0, num_unique_values - 1)
127132
with tm.maybe_produces_warning(
128133
PerformanceWarning,
129-
pa_version_under2p0 and str(index_or_series_obj.dtype) == "string[pyarrow]",
134+
pa_version_under2p0
135+
and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow",
130136
):
131137
assert obj.nunique(dropna=False) == max(0, num_unique_values)
132138

pandas/tests/extension/test_arrow.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1215,7 +1215,10 @@ def test_unique(self, data, box, method, request):
12151215
reason=f"unique has no pyarrow kernel for {pa_dtype}.",
12161216
)
12171217
)
1218-
super().test_unique(data, box, method)
1218+
with tm.maybe_produces_warning(
1219+
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
1220+
):
1221+
super().test_unique(data, box, method)
12191222

12201223
@pytest.mark.parametrize("na_sentinel", [-1, -2])
12211224
def test_factorize(self, data_for_grouping, na_sentinel, request):
@@ -1245,7 +1248,10 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request):
12451248
reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}",
12461249
)
12471250
)
1248-
super().test_factorize_equivalence(data_for_grouping, na_sentinel)
1251+
with tm.maybe_produces_warning(
1252+
PerformanceWarning, pa_version_under2p0, check_stacklevel=False
1253+
):
1254+
super().test_factorize_equivalence(data_for_grouping, na_sentinel)
12491255

12501256
def test_factorize_empty(self, data, request):
12511257
pa_dtype = data.dtype.pyarrow_dtype

pandas/tests/extension/test_string.py

+21
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import pytest
2020

2121
from pandas.compat import (
22+
pa_version_under2p0,
2223
pa_version_under6p0,
2324
pa_version_under7p0,
2425
)
@@ -319,6 +320,26 @@ def test_sort_values_frame(self, data_for_sorting, ascending):
319320
):
320321
super().test_sort_values_frame(data_for_sorting, ascending)
321322

323+
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
324+
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])
325+
def test_unique(self, data, box, method):
326+
with tm.maybe_produces_warning(
327+
PerformanceWarning,
328+
pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow",
329+
check_stacklevel=False,
330+
):
331+
super().test_unique(data, box, method)
332+
333+
@pytest.mark.parametrize("na_sentinel", [-1, -2])
334+
def test_factorize_equivalence(self, data_for_grouping, na_sentinel):
335+
with tm.maybe_produces_warning(
336+
PerformanceWarning,
337+
pa_version_under2p0
338+
and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow",
339+
check_stacklevel=False,
340+
):
341+
super().test_factorize_equivalence(data_for_grouping, na_sentinel)
342+
322343

323344
class TestCasting(base.BaseCastingTests):
324345
pass

pandas/tests/indexes/test_common.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from pandas.compat import (
1212
IS64,
13+
pa_version_under2p0,
1314
pa_version_under7p0,
1415
)
1516
from pandas.errors import PerformanceWarning
@@ -229,7 +230,12 @@ def test_unique(self, index_flat):
229230
except NotImplementedError:
230231
pass
231232

232-
result = idx.unique()
233+
with tm.maybe_produces_warning(
234+
PerformanceWarning,
235+
pa_version_under2p0
236+
and getattr(index_flat.dtype, "storage", "") == "pyarrow",
237+
):
238+
result = idx.unique()
233239
tm.assert_index_equal(result, idx_unique)
234240

235241
# nans:
@@ -248,8 +254,14 @@ def test_unique(self, index_flat):
248254
assert idx_unique_nan.dtype == index.dtype
249255

250256
expected = idx_unique_nan
251-
for i in [idx_nan, idx_unique_nan]:
252-
result = i.unique()
257+
for pos, i in enumerate([idx_nan, idx_unique_nan]):
258+
with tm.maybe_produces_warning(
259+
PerformanceWarning,
260+
pa_version_under2p0
261+
and getattr(index_flat.dtype, "storage", "") == "pyarrow"
262+
and pos == 0,
263+
):
264+
result = i.unique()
253265
tm.assert_index_equal(result, expected)
254266

255267
def test_searchsorted_monotonic(self, index_flat, request):
@@ -466,13 +478,12 @@ def test_hasnans_isnans(self, index_flat):
466478

467479
@pytest.mark.parametrize("na_position", [None, "middle"])
468480
def test_sort_values_invalid_na_position(index_with_missing, na_position):
469-
with tm.maybe_produces_warning(
470-
PerformanceWarning,
471-
pa_version_under7p0
472-
and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
473-
check_stacklevel=False,
474-
):
475-
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
481+
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
482+
with tm.maybe_produces_warning(
483+
PerformanceWarning,
484+
getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
485+
check_stacklevel=False,
486+
):
476487
index_with_missing.sort_values(na_position=na_position)
477488

478489

pandas/tests/indexes/test_setops.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas.compat import pa_version_under7p0
11+
from pandas.compat import (
12+
pa_version_under2p0,
13+
pa_version_under7p0,
14+
)
1215
from pandas.errors import PerformanceWarning
1316

1417
from pandas.core.dtypes.cast import find_common_type
@@ -573,14 +576,15 @@ def test_intersection_duplicates_all_indexes(index):
573576
# No duplicates in empty indexes
574577
return
575578

576-
def check_intersection_commutative(left, right):
577-
assert left.intersection(right).equals(right.intersection(left))
578-
579579
idx = index
580580
idx_non_unique = idx[[0, 0, 1, 2]]
581581

582-
check_intersection_commutative(idx, idx_non_unique)
583-
assert idx.intersection(idx_non_unique).is_unique
582+
with tm.maybe_produces_warning(
583+
PerformanceWarning,
584+
pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow",
585+
):
586+
assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
587+
assert idx.intersection(idx_non_unique).is_unique
584588

585589

586590
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)