From 8051033bd632732aa96bfa8c90b2004f15b44d40 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 8 Jun 2023 11:09:05 -0700 Subject: [PATCH] Backport PR #53548: CI/DEPS: Add xfail(strict=False) to related unstable sorting changes in Numpy 1.25 --- pandas/tests/frame/methods/test_nlargest.py | 15 +++- .../tests/frame/methods/test_sort_values.py | 29 +++++- pandas/tests/groupby/test_value_counts.py | 89 +++++++++++++++++-- 3 files changed, 123 insertions(+), 10 deletions(-) diff --git a/pandas/tests/frame/methods/test_nlargest.py b/pandas/tests/frame/methods/test_nlargest.py index b5c33a41dd780..17dea51263222 100644 --- a/pandas/tests/frame/methods/test_nlargest.py +++ b/pandas/tests/frame/methods/test_nlargest.py @@ -9,6 +9,7 @@ import pandas as pd import pandas._testing as tm +from pandas.util.version import Version @pytest.fixture @@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self): [["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]], ) @pytest.mark.parametrize("n", range(1, 6)) - def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): + def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request): # GH#13412 df = df_duplicates @@ -165,6 +166,18 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order): result = df.nlargest(n, order) expected = df.sort_values(order, ascending=False).head(n) + if Version(np.__version__) >= Version("1.25") and ( + (order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5 + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) tm.assert_frame_equal(result, expected) def test_nlargest_duplicate_keep_all_ties(self): diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index e2877acbdd040..4c41632040dbe 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -12,6 +12,7 @@ date_range, ) import pandas._testing as tm +from pandas.util.version import Version class TestDataFrameSortValues: @@ -849,9 +850,22 @@ def ascending(request): class TestSortValuesLevelAsStr: def test_sort_index_level_and_column_label( - self, df_none, df_idx, sort_names, ascending + self, df_none, df_idx, sort_names, ascending, request ): # GH#14353 + if ( + Version(np.__version__) >= Version("1.25") + and request.node.callspec.id == "df_idx0-inner-True" + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) # Get index levels from df_idx levels = df_idx.index.names @@ -867,7 +881,7 @@ def test_sort_index_level_and_column_label( tm.assert_frame_equal(result, expected) def test_sort_column_level_and_index_label( - self, df_none, df_idx, sort_names, ascending + self, df_none, df_idx, sort_names, ascending, request ): # GH#14353 @@ -886,6 +900,17 @@ def test_sort_column_level_and_index_label( # Compute result by transposing and sorting on axis=1. result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1) + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) + tm.assert_frame_equal(result, expected) def test_sort_values_validate_ascending_for_value_error(self): diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 2c3c2277ed627..ce29fba7a7ab0 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -21,6 +21,7 @@ to_datetime, ) import pandas._testing as tm +from pandas.util.version import Version def tests_value_counts_index_names_category_column(): @@ -244,8 +245,18 @@ def test_bad_subset(education_df): gp.value_counts(subset=["country"]) -def test_basic(education_df): +def test_basic(education_df, request): # gh43564 + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) result = education_df.groupby("country")[["gender", "education"]].value_counts( normalize=True ) @@ -283,7 +294,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending): @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize("frame", [True, False]) def test_against_frame_and_seriesgroupby( - education_df, groupby, normalize, name, sort, ascending, as_index, frame + education_df, groupby, normalize, name, sort, ascending, as_index, frame, request ): # test all parameters: # - Use column, array or function as by= parameter @@ -293,6 +304,16 @@ def test_against_frame_and_seriesgroupby( # - 3-way compare against: # - apply with :meth:`~DataFrame.value_counts` # - `~SeriesGroupBy.value_counts` + if Version(np.__version__) >= Version("1.25") and frame and sort and normalize: + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) by = { "column": "country", "array": education_df["country"].values, @@ -454,8 +475,18 @@ def nulls_df(): ], ) def test_dropna_combinations( - nulls_df, group_dropna, count_dropna, expected_rows, expected_values + nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request ): + if Version(np.__version__) >= Version("1.25") and not group_dropna: + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) gp = nulls_df.groupby(["A", "B"], dropna=group_dropna) result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna) columns = DataFrame() @@ -546,10 +577,20 @@ def test_data_frame_value_counts_dropna( ], ) def test_categorical_single_grouper_with_only_observed_categories( - education_df, as_index, observed, normalize, name, expected_data + education_df, as_index, observed, normalize, name, expected_data, request ): # Test single categorical grouper with only observed grouping categories # when non-groupers are also categorical + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) gp = education_df.astype("category").groupby( "country", as_index=as_index, observed=observed @@ -645,10 +686,21 @@ def assert_categorical_single_grouper( ], ) def test_categorical_single_grouper_observed_true( - education_df, as_index, normalize, name, expected_data + education_df, as_index, normalize, name, expected_data, request ): # GH#46357 + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) + expected_index = [ ("FR", "male", "low"), ("FR", "female", "high"), @@ -715,10 +767,21 @@ def test_categorical_single_grouper_observed_true( ], ) def test_categorical_single_grouper_observed_false( - education_df, as_index, normalize, name, expected_data + education_df, as_index, normalize, name, expected_data, request ): # GH#46357 + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) + expected_index = [ ("FR", "male", "low"), ("FR", "female", "high"), @@ -856,10 +919,22 @@ def test_categorical_multiple_groupers( ], ) def test_categorical_non_groupers( - education_df, as_index, observed, normalize, name, expected_data + education_df, as_index, observed, normalize, name, expected_data, request ): # GH#46357 Test non-observed categories are included in the result, # regardless of `observed` + + if Version(np.__version__) >= Version("1.25"): + request.node.add_marker( + pytest.mark.xfail( + reason=( + "pandas default unstable sorting of duplicates" + "issue with numpy>=1.25 with AVX instructions" + ), + strict=False, + ) + ) + education_df = education_df.copy() education_df["gender"] = education_df["gender"].astype("category") education_df["education"] = education_df["education"].astype("category")