Skip to content

Commit bdfaca6

Browse files
authored
CI/DEPS: Add xfail(strict=False) to related unstable sorting changes in Numpy 1.25 (#53548)
* DEBUG: npdev build * Address tests where sorting changed * Adjust more tests * Undo everything, even nanargsort * xfail the relevant tests * Add xfail to test_sort_column_level_and_index_label
1 parent 6a1eb09 commit bdfaca6

File tree

4 files changed

+124
-11
lines changed

4 files changed

+124
-11
lines changed

pandas/core/sorting.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ def lexsort_indexer(
424424

425425
def nargsort(
426426
items: ArrayLike | Index | Series,
427-
kind: SortKind = "stable",
427+
kind: SortKind = "quicksort",
428428
ascending: bool = True,
429429
na_position: str = "last",
430430
key: Callable | None = None,

pandas/tests/frame/methods/test_nlargest.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import pandas as pd
1111
import pandas._testing as tm
12+
from pandas.util.version import Version
1213

1314

1415
@pytest.fixture
@@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self):
155156
[["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
156157
)
157158
@pytest.mark.parametrize("n", range(1, 6))
158-
def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
159+
def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
159160
# GH#13412
160161

161162
df = df_duplicates
@@ -165,6 +166,18 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
165166

166167
result = df.nlargest(n, order)
167168
expected = df.sort_values(order, ascending=False).head(n)
169+
if Version(np.__version__) >= Version("1.25") and (
170+
(order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5
171+
):
172+
request.node.add_marker(
173+
pytest.mark.xfail(
174+
reason=(
175+
"pandas default unstable sorting of duplicates"
176+
"issue with numpy>=1.25 with AVX instructions"
177+
),
178+
strict=False,
179+
)
180+
)
168181
tm.assert_frame_equal(result, expected)
169182

170183
def test_nlargest_duplicate_keep_all_ties(self):

pandas/tests/frame/methods/test_sort_values.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
date_range,
1313
)
1414
import pandas._testing as tm
15+
from pandas.util.version import Version
1516

1617

1718
class TestDataFrameSortValues:
@@ -849,9 +850,22 @@ def ascending(request):
849850

850851
class TestSortValuesLevelAsStr:
851852
def test_sort_index_level_and_column_label(
852-
self, df_none, df_idx, sort_names, ascending
853+
self, df_none, df_idx, sort_names, ascending, request
853854
):
854855
# GH#14353
856+
if (
857+
Version(np.__version__) >= Version("1.25")
858+
and request.node.callspec.id == "df_idx0-inner-True"
859+
):
860+
request.node.add_marker(
861+
pytest.mark.xfail(
862+
reason=(
863+
"pandas default unstable sorting of duplicates"
864+
"issue with numpy>=1.25 with AVX instructions"
865+
),
866+
strict=False,
867+
)
868+
)
855869

856870
# Get index levels from df_idx
857871
levels = df_idx.index.names
@@ -867,7 +881,7 @@ def test_sort_index_level_and_column_label(
867881
tm.assert_frame_equal(result, expected)
868882

869883
def test_sort_column_level_and_index_label(
870-
self, df_none, df_idx, sort_names, ascending
884+
self, df_none, df_idx, sort_names, ascending, request
871885
):
872886
# GH#14353
873887

@@ -886,6 +900,17 @@ def test_sort_column_level_and_index_label(
886900
# Compute result by transposing and sorting on axis=1.
887901
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
888902

903+
if Version(np.__version__) >= Version("1.25"):
904+
request.node.add_marker(
905+
pytest.mark.xfail(
906+
reason=(
907+
"pandas default unstable sorting of duplicates"
908+
"issue with numpy>=1.25 with AVX instructions"
909+
),
910+
strict=False,
911+
)
912+
)
913+
889914
tm.assert_frame_equal(result, expected)
890915

891916
def test_sort_values_validate_ascending_for_value_error(self):

pandas/tests/groupby/test_value_counts.py

+82-7
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
to_datetime,
2222
)
2323
import pandas._testing as tm
24+
from pandas.util.version import Version
2425

2526

2627
def tests_value_counts_index_names_category_column():
@@ -246,8 +247,18 @@ def test_bad_subset(education_df):
246247
gp.value_counts(subset=["country"])
247248

248249

249-
def test_basic(education_df):
250+
def test_basic(education_df, request):
250251
# gh43564
252+
if Version(np.__version__) >= Version("1.25"):
253+
request.node.add_marker(
254+
pytest.mark.xfail(
255+
reason=(
256+
"pandas default unstable sorting of duplicates"
257+
"issue with numpy>=1.25 with AVX instructions"
258+
),
259+
strict=False,
260+
)
261+
)
251262
result = education_df.groupby("country")[["gender", "education"]].value_counts(
252263
normalize=True
253264
)
@@ -285,7 +296,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
285296
@pytest.mark.parametrize("as_index", [True, False])
286297
@pytest.mark.parametrize("frame", [True, False])
287298
def test_against_frame_and_seriesgroupby(
288-
education_df, groupby, normalize, name, sort, ascending, as_index, frame
299+
education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
289300
):
290301
# test all parameters:
291302
# - Use column, array or function as by= parameter
@@ -295,6 +306,16 @@ def test_against_frame_and_seriesgroupby(
295306
# - 3-way compare against:
296307
# - apply with :meth:`~DataFrame.value_counts`
297308
# - `~SeriesGroupBy.value_counts`
309+
if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
310+
request.node.add_marker(
311+
pytest.mark.xfail(
312+
reason=(
313+
"pandas default unstable sorting of duplicates"
314+
"issue with numpy>=1.25 with AVX instructions"
315+
),
316+
strict=False,
317+
)
318+
)
298319
by = {
299320
"column": "country",
300321
"array": education_df["country"].values,
@@ -456,8 +477,18 @@ def nulls_df():
456477
],
457478
)
458479
def test_dropna_combinations(
459-
nulls_df, group_dropna, count_dropna, expected_rows, expected_values
480+
nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
460481
):
482+
if Version(np.__version__) >= Version("1.25") and not group_dropna:
483+
request.node.add_marker(
484+
pytest.mark.xfail(
485+
reason=(
486+
"pandas default unstable sorting of duplicates"
487+
"issue with numpy>=1.25 with AVX instructions"
488+
),
489+
strict=False,
490+
)
491+
)
461492
gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
462493
result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
463494
columns = DataFrame()
@@ -548,10 +579,20 @@ def test_data_frame_value_counts_dropna(
548579
],
549580
)
550581
def test_categorical_single_grouper_with_only_observed_categories(
551-
education_df, as_index, observed, normalize, name, expected_data
582+
education_df, as_index, observed, normalize, name, expected_data, request
552583
):
553584
# Test single categorical grouper with only observed grouping categories
554585
# when non-groupers are also categorical
586+
if Version(np.__version__) >= Version("1.25"):
587+
request.node.add_marker(
588+
pytest.mark.xfail(
589+
reason=(
590+
"pandas default unstable sorting of duplicates"
591+
"issue with numpy>=1.25 with AVX instructions"
592+
),
593+
strict=False,
594+
)
595+
)
555596

556597
gp = education_df.astype("category").groupby(
557598
"country", as_index=as_index, observed=observed
@@ -647,10 +688,21 @@ def assert_categorical_single_grouper(
647688
],
648689
)
649690
def test_categorical_single_grouper_observed_true(
650-
education_df, as_index, normalize, name, expected_data
691+
education_df, as_index, normalize, name, expected_data, request
651692
):
652693
# GH#46357
653694

695+
if Version(np.__version__) >= Version("1.25"):
696+
request.node.add_marker(
697+
pytest.mark.xfail(
698+
reason=(
699+
"pandas default unstable sorting of duplicates"
700+
"issue with numpy>=1.25 with AVX instructions"
701+
),
702+
strict=False,
703+
)
704+
)
705+
654706
expected_index = [
655707
("FR", "male", "low"),
656708
("FR", "female", "high"),
@@ -717,10 +769,21 @@ def test_categorical_single_grouper_observed_true(
717769
],
718770
)
719771
def test_categorical_single_grouper_observed_false(
720-
education_df, as_index, normalize, name, expected_data
772+
education_df, as_index, normalize, name, expected_data, request
721773
):
722774
# GH#46357
723775

776+
if Version(np.__version__) >= Version("1.25"):
777+
request.node.add_marker(
778+
pytest.mark.xfail(
779+
reason=(
780+
"pandas default unstable sorting of duplicates"
781+
"issue with numpy>=1.25 with AVX instructions"
782+
),
783+
strict=False,
784+
)
785+
)
786+
724787
expected_index = [
725788
("FR", "male", "low"),
726789
("FR", "female", "high"),
@@ -858,10 +921,22 @@ def test_categorical_multiple_groupers(
858921
],
859922
)
860923
def test_categorical_non_groupers(
861-
education_df, as_index, observed, normalize, name, expected_data
924+
education_df, as_index, observed, normalize, name, expected_data, request
862925
):
863926
# GH#46357 Test non-observed categories are included in the result,
864927
# regardless of `observed`
928+
929+
if Version(np.__version__) >= Version("1.25"):
930+
request.node.add_marker(
931+
pytest.mark.xfail(
932+
reason=(
933+
"pandas default unstable sorting of duplicates"
934+
"issue with numpy>=1.25 with AVX instructions"
935+
),
936+
strict=False,
937+
)
938+
)
939+
865940
education_df = education_df.copy()
866941
education_df["gender"] = education_df["gender"].astype("category")
867942
education_df["education"] = education_df["education"].astype("category")

0 commit comments

Comments
 (0)