Skip to content

Commit 1b2cc26

Browse files
authored
TST: Filter/catch pyarrow PerformanceWarnings (#48208)
1 parent 64e7859 commit 1b2cc26

File tree

5 files changed

+169
-22
lines changed

5 files changed

+169
-22
lines changed

pandas/tests/extension/test_arrow.py

+61-6
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,11 @@
2525
pa_version_under3p0,
2626
pa_version_under4p0,
2727
pa_version_under6p0,
28+
pa_version_under7p0,
2829
pa_version_under8p0,
2930
pa_version_under9p0,
3031
)
32+
from pandas.errors import PerformanceWarning
3133

3234
import pandas as pd
3335
import pandas._testing as tm
@@ -446,7 +448,10 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
446448
reason=f"pyarrow doesn't support factorizing {pa_dtype}",
447449
)
448450
)
449-
super().test_groupby_extension_transform(data_for_grouping)
451+
with tm.maybe_produces_warning(
452+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
453+
):
454+
super().test_groupby_extension_transform(data_for_grouping)
450455

451456
def test_groupby_extension_apply(
452457
self, data_for_grouping, groupby_apply_op, request
@@ -479,7 +484,10 @@ def test_groupby_extension_apply(
479484
reason="GH 34986",
480485
)
481486
)
482-
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
487+
with tm.maybe_produces_warning(
488+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
489+
):
490+
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
483491

484492
def test_in_numeric_groupby(self, data_for_grouping, request):
485493
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
@@ -518,7 +526,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
518526
reason="GH 34986",
519527
)
520528
)
521-
super().test_groupby_extension_agg(as_index, data_for_grouping)
529+
with tm.maybe_produces_warning(
530+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
531+
):
532+
super().test_groupby_extension_agg(as_index, data_for_grouping)
522533

523534

524535
class TestBaseDtype(base.BaseDtypeTests):
@@ -607,6 +618,10 @@ def test_view(self, data):
607618

608619

609620
class TestBaseMissing(base.BaseMissingTests):
621+
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
622+
def test_dropna_array(self, data_missing):
623+
super().test_dropna_array(data_missing)
624+
610625
def test_fillna_limit_pad(self, data_missing, using_array_manager, request):
611626
if using_array_manager and pa.types.is_duration(
612627
data_missing.dtype.pyarrow_dtype
@@ -1331,6 +1346,12 @@ def test_invert(self, data, request):
13311346

13321347

13331348
class TestBaseMethods(base.BaseMethodsTests):
1349+
def test_argsort_missing_array(self, data_missing_for_sorting):
1350+
with tm.maybe_produces_warning(
1351+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1352+
):
1353+
super().test_argsort_missing_array(data_missing_for_sorting)
1354+
13341355
@pytest.mark.parametrize("periods", [1, -2])
13351356
def test_diff(self, data, periods, request):
13361357
pa_dtype = data.dtype.pyarrow_dtype
@@ -1345,6 +1366,7 @@ def test_diff(self, data, periods, request):
13451366
)
13461367
super().test_diff(data, periods)
13471368

1369+
@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
13481370
@pytest.mark.parametrize("dropna", [True, False])
13491371
def test_value_counts(self, all_data, dropna, request):
13501372
pa_dtype = all_data.dtype.pyarrow_dtype
@@ -1384,7 +1406,10 @@ def test_value_counts_with_normalize(self, data, request):
13841406
reason=f"value_count has no pyarrow kernel for {pa_dtype}",
13851407
)
13861408
)
1387-
super().test_value_counts_with_normalize(data)
1409+
with tm.maybe_produces_warning(
1410+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1411+
):
1412+
super().test_value_counts_with_normalize(data)
13881413

13891414
@pytest.mark.xfail(
13901415
pa_version_under6p0,
@@ -1445,6 +1470,19 @@ def test_argreduce_series(
14451470
data_missing_for_sorting, op_name, skipna, expected
14461471
)
14471472

1473+
@pytest.mark.parametrize(
1474+
"na_position, expected",
1475+
[
1476+
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
1477+
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
1478+
],
1479+
)
1480+
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
1481+
with tm.maybe_produces_warning(
1482+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1483+
):
1484+
super().test_nargsort(data_missing_for_sorting, na_position, expected)
1485+
14481486
@pytest.mark.parametrize("ascending", [True, False])
14491487
def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
14501488
pa_dtype = data_for_sorting.dtype.pyarrow_dtype
@@ -1458,7 +1496,21 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
14581496
),
14591497
)
14601498
)
1461-
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
1499+
with tm.maybe_produces_warning(
1500+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1501+
):
1502+
super().test_sort_values(data_for_sorting, ascending, sort_by_key)
1503+
1504+
@pytest.mark.parametrize("ascending", [True, False])
1505+
def test_sort_values_missing(
1506+
self, data_missing_for_sorting, ascending, sort_by_key
1507+
):
1508+
with tm.maybe_produces_warning(
1509+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1510+
):
1511+
super().test_sort_values_missing(
1512+
data_missing_for_sorting, ascending, sort_by_key
1513+
)
14621514

14631515
@pytest.mark.parametrize("ascending", [True, False])
14641516
def test_sort_values_frame(self, data_for_sorting, ascending, request):
@@ -1473,7 +1525,10 @@ def test_sort_values_frame(self, data_for_sorting, ascending, request):
14731525
),
14741526
)
14751527
)
1476-
super().test_sort_values_frame(data_for_sorting, ascending)
1528+
with tm.maybe_produces_warning(
1529+
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
1530+
):
1531+
super().test_sort_values_frame(data_for_sorting, ascending)
14771532

14781533
@pytest.mark.parametrize("box", [pd.Series, lambda x: x])
14791534
@pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique])

pandas/tests/indexes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def test_memory_usage(self, index):
285285
if index.inferred_type == "object":
286286
assert result3 > result2
287287

288-
def test_argsort(self, request, index):
288+
def test_argsort(self, index):
289289
# separately tested
290290
if isinstance(index, CategoricalIndex):
291291
return

pandas/tests/indexes/test_base.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,14 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas.compat import IS64
12-
from pandas.errors import InvalidIndexError
11+
from pandas.compat import (
12+
IS64,
13+
pa_version_under7p0,
14+
)
15+
from pandas.errors import (
16+
InvalidIndexError,
17+
PerformanceWarning,
18+
)
1319
from pandas.util._test_decorators import async_mark
1420

1521
import pandas as pd
@@ -62,6 +68,22 @@ def test_new_axis(self, index):
6268
assert new_index.ndim == 2
6369
assert isinstance(new_index, np.ndarray)
6470

71+
def test_argsort(self, index):
72+
with tm.maybe_produces_warning(
73+
PerformanceWarning,
74+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
75+
check_stacklevel=False,
76+
):
77+
super().test_argsort(index)
78+
79+
def test_numpy_argsort(self, index):
80+
with tm.maybe_produces_warning(
81+
PerformanceWarning,
82+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
83+
check_stacklevel=False,
84+
):
85+
super().test_numpy_argsort(index)
86+
6587
def test_constructor_regular(self, index):
6688
tm.assert_contains_all(index, index)
6789

pandas/tests/indexes/test_common.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
IS64,
1313
pa_version_under7p0,
1414
)
15+
from pandas.errors import PerformanceWarning
1516

1617
from pandas.core.dtypes.common import is_integer_dtype
1718

@@ -169,7 +170,12 @@ def test_copy_name(self, index_flat):
169170
s1 = pd.Series(2, index=first)
170171
s2 = pd.Series(3, index=second[:-1])
171172
# See GH#13365
172-
s3 = s1 * s2
173+
with tm.maybe_produces_warning(
174+
PerformanceWarning,
175+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
176+
check_stacklevel=False,
177+
):
178+
s3 = s1 * s2
173179
assert s3.index.name == "mario"
174180

175181
def test_copy_name2(self, index_flat):
@@ -460,9 +466,14 @@ def test_hasnans_isnans(self, index_flat):
460466

461467
@pytest.mark.parametrize("na_position", [None, "middle"])
462468
def test_sort_values_invalid_na_position(index_with_missing, na_position):
463-
464-
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
465-
index_with_missing.sort_values(na_position=na_position)
469+
with tm.maybe_produces_warning(
470+
PerformanceWarning,
471+
pa_version_under7p0
472+
and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
473+
check_stacklevel=False,
474+
):
475+
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
476+
index_with_missing.sort_values(na_position=na_position)
466477

467478

468479
@pytest.mark.parametrize("na_position", ["first", "last"])
@@ -488,7 +499,13 @@ def test_sort_values_with_missing(index_with_missing, na_position, request):
488499
# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
489500
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)
490501

491-
result = index_with_missing.sort_values(na_position=na_position)
502+
with tm.maybe_produces_warning(
503+
PerformanceWarning,
504+
pa_version_under7p0
505+
and getattr(index_with_missing.dtype, "storage", "") == "pyarrow",
506+
check_stacklevel=False,
507+
):
508+
result = index_with_missing.sort_values(na_position=na_position)
492509
tm.assert_index_equal(result, expected)
493510

494511

pandas/tests/indexes/test_setops.py

+61-8
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytest
1010

1111
from pandas.compat import pa_version_under7p0
12+
from pandas.errors import PerformanceWarning
1213

1314
from pandas.core.dtypes.cast import find_common_type
1415

@@ -38,8 +39,18 @@
3839
def test_union_same_types(index):
3940
# Union with a non-unique, non-monotonic index raises error
4041
# Only needed for bool index factory
41-
idx1 = index.sort_values()
42-
idx2 = index.sort_values()
42+
with tm.maybe_produces_warning(
43+
PerformanceWarning,
44+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
45+
check_stacklevel=False,
46+
):
47+
idx1 = index.sort_values()
48+
with tm.maybe_produces_warning(
49+
PerformanceWarning,
50+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
51+
check_stacklevel=False,
52+
):
53+
idx2 = index.sort_values()
4354
assert idx1.union(idx2).dtype == idx1.dtype
4455

4556

@@ -98,8 +109,18 @@ def test_union_different_types(index_flat, index_flat2, request):
98109

99110
# Union with a non-unique, non-monotonic index raises error
100111
# This applies to the boolean index
101-
idx1 = idx1.sort_values()
102-
idx2 = idx2.sort_values()
112+
with tm.maybe_produces_warning(
113+
PerformanceWarning,
114+
pa_version_under7p0 and getattr(idx1.dtype, "storage", "") == "pyarrow",
115+
check_stacklevel=False,
116+
):
117+
idx1 = idx1.sort_values()
118+
with tm.maybe_produces_warning(
119+
PerformanceWarning,
120+
pa_version_under7p0 and getattr(idx2.dtype, "storage", "") == "pyarrow",
121+
check_stacklevel=False,
122+
):
123+
idx2 = idx2.sort_values()
103124

104125
with tm.assert_produces_warning(warn, match="'<' not supported between"):
105126
res1 = idx1.union(idx2)
@@ -231,6 +252,9 @@ def test_intersection_base(self, index):
231252
with pytest.raises(TypeError, match=msg):
232253
first.intersection([1, 2, 3])
233254

255+
@pytest.mark.filterwarnings(
256+
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
257+
)
234258
def test_union_base(self, index):
235259
first = index[3:]
236260
second = index[:5]
@@ -255,6 +279,9 @@ def test_union_base(self, index):
255279
with pytest.raises(TypeError, match=msg):
256280
first.union([1, 2, 3])
257281

282+
@pytest.mark.filterwarnings(
283+
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
284+
)
258285
def test_difference_base(self, sort, index):
259286
first = index[2:]
260287
second = index[:4]
@@ -280,6 +307,9 @@ def test_difference_base(self, sort, index):
280307
with pytest.raises(TypeError, match=msg):
281308
first.difference([1, 2, 3], sort)
282309

310+
@pytest.mark.filterwarnings(
311+
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
312+
)
283313
def test_symmetric_difference(self, index):
284314
if isinstance(index, CategoricalIndex):
285315
return
@@ -371,8 +401,18 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name):
371401
# test copy.union(subset) - need sort for unicode and string
372402
first = index.copy().set_names(fname)
373403
second = index[1:].set_names(sname)
374-
union = first.union(second).sort_values()
375-
expected = index.set_names(expected_name).sort_values()
404+
with tm.maybe_produces_warning(
405+
PerformanceWarning,
406+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
407+
check_stacklevel=False,
408+
):
409+
union = first.union(second).sort_values()
410+
with tm.maybe_produces_warning(
411+
PerformanceWarning,
412+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
413+
check_stacklevel=False,
414+
):
415+
expected = index.set_names(expected_name).sort_values()
376416
tm.assert_index_equal(union, expected)
377417

378418
@pytest.mark.parametrize(
@@ -438,8 +478,18 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
438478
# test copy.intersection(subset) - need sort for unicode and string
439479
first = index.copy().set_names(fname)
440480
second = index[1:].set_names(sname)
441-
intersect = first.intersection(second).sort_values()
442-
expected = index[1:].set_names(expected_name).sort_values()
481+
with tm.maybe_produces_warning(
482+
PerformanceWarning,
483+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
484+
check_stacklevel=False,
485+
):
486+
intersect = first.intersection(second).sort_values()
487+
with tm.maybe_produces_warning(
488+
PerformanceWarning,
489+
pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow",
490+
check_stacklevel=False,
491+
):
492+
expected = index[1:].set_names(expected_name).sort_values()
443493
tm.assert_index_equal(intersect, expected)
444494

445495
def test_intersection_name_retention_with_nameless(self, index):
@@ -495,6 +545,9 @@ def test_intersection_difference_match_empty(self, index, sort):
495545
tm.assert_index_equal(inter, diff, exact=True)
496546

497547

548+
@pytest.mark.filterwarnings(
549+
"ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
550+
)
498551
@pytest.mark.parametrize(
499552
"method", ["intersection", "union", "difference", "symmetric_difference"]
500553
)

0 commit comments

Comments
 (0)