Skip to content

Backport PR #53548: CI/DEPS: Add xfail(strict=False) to related unstable sorting changes in Numpy 1.25 #53566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion pandas/tests/frame/methods/test_nlargest.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import pandas as pd
import pandas._testing as tm
from pandas.util.version import Version


@pytest.fixture
Expand Down Expand Up @@ -155,7 +156,7 @@ def test_nlargest_n_identical_values(self):
[["a", "b", "c"], ["c", "b", "a"], ["a"], ["b"], ["a", "b"], ["c", "b"]],
)
@pytest.mark.parametrize("n", range(1, 6))
def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):
def test_nlargest_n_duplicate_index(self, df_duplicates, n, order, request):
# GH#13412

df = df_duplicates
Expand All @@ -165,6 +166,18 @@ def test_nlargest_n_duplicate_index(self, df_duplicates, n, order):

result = df.nlargest(n, order)
expected = df.sort_values(order, ascending=False).head(n)
if Version(np.__version__) >= Version("1.25") and (
(order == ["a"] and n in (1, 2, 3, 4)) or (order == ["a", "b"]) and n == 5
):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)
tm.assert_frame_equal(result, expected)

def test_nlargest_duplicate_keep_all_ties(self):
Expand Down
29 changes: 27 additions & 2 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
date_range,
)
import pandas._testing as tm
from pandas.util.version import Version


class TestDataFrameSortValues:
Expand Down Expand Up @@ -849,9 +850,22 @@ def ascending(request):

class TestSortValuesLevelAsStr:
def test_sort_index_level_and_column_label(
self, df_none, df_idx, sort_names, ascending
self, df_none, df_idx, sort_names, ascending, request
):
# GH#14353
if (
Version(np.__version__) >= Version("1.25")
and request.node.callspec.id == "df_idx0-inner-True"
):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

# Get index levels from df_idx
levels = df_idx.index.names
Expand All @@ -867,7 +881,7 @@ def test_sort_index_level_and_column_label(
tm.assert_frame_equal(result, expected)

def test_sort_column_level_and_index_label(
self, df_none, df_idx, sort_names, ascending
self, df_none, df_idx, sort_names, ascending, request
):
# GH#14353

Expand All @@ -886,6 +900,17 @@ def test_sort_column_level_and_index_label(
# Compute result by transposing and sorting on axis=1.
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)

if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

tm.assert_frame_equal(result, expected)

def test_sort_values_validate_ascending_for_value_error(self):
Expand Down
89 changes: 82 additions & 7 deletions pandas/tests/groupby/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
to_datetime,
)
import pandas._testing as tm
from pandas.util.version import Version


def tests_value_counts_index_names_category_column():
Expand Down Expand Up @@ -244,8 +245,18 @@ def test_bad_subset(education_df):
gp.value_counts(subset=["country"])


def test_basic(education_df):
def test_basic(education_df, request):
# gh43564
if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)
result = education_df.groupby("country")[["gender", "education"]].value_counts(
normalize=True
)
Expand Down Expand Up @@ -283,7 +294,7 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("frame", [True, False])
def test_against_frame_and_seriesgroupby(
education_df, groupby, normalize, name, sort, ascending, as_index, frame
education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
):
# test all parameters:
# - Use column, array or function as by= parameter
Expand All @@ -293,6 +304,16 @@ def test_against_frame_and_seriesgroupby(
# - 3-way compare against:
# - apply with :meth:`~DataFrame.value_counts`
# - `~SeriesGroupBy.value_counts`
if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)
by = {
"column": "country",
"array": education_df["country"].values,
Expand Down Expand Up @@ -454,8 +475,18 @@ def nulls_df():
],
)
def test_dropna_combinations(
nulls_df, group_dropna, count_dropna, expected_rows, expected_values
nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
):
if Version(np.__version__) >= Version("1.25") and not group_dropna:
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)
gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
columns = DataFrame()
Expand Down Expand Up @@ -546,10 +577,20 @@ def test_data_frame_value_counts_dropna(
],
)
def test_categorical_single_grouper_with_only_observed_categories(
education_df, as_index, observed, normalize, name, expected_data
education_df, as_index, observed, normalize, name, expected_data, request
):
# Test single categorical grouper with only observed grouping categories
# when non-groupers are also categorical
if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

gp = education_df.astype("category").groupby(
"country", as_index=as_index, observed=observed
Expand Down Expand Up @@ -645,10 +686,21 @@ def assert_categorical_single_grouper(
],
)
def test_categorical_single_grouper_observed_true(
education_df, as_index, normalize, name, expected_data
education_df, as_index, normalize, name, expected_data, request
):
# GH#46357

if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

expected_index = [
("FR", "male", "low"),
("FR", "female", "high"),
Expand Down Expand Up @@ -715,10 +767,21 @@ def test_categorical_single_grouper_observed_true(
],
)
def test_categorical_single_grouper_observed_false(
education_df, as_index, normalize, name, expected_data
education_df, as_index, normalize, name, expected_data, request
):
# GH#46357

if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

expected_index = [
("FR", "male", "low"),
("FR", "female", "high"),
Expand Down Expand Up @@ -856,10 +919,22 @@ def test_categorical_multiple_groupers(
],
)
def test_categorical_non_groupers(
education_df, as_index, observed, normalize, name, expected_data
education_df, as_index, observed, normalize, name, expected_data, request
):
# GH#46357 Test non-observed categories are included in the result,
# regardless of `observed`

if Version(np.__version__) >= Version("1.25"):
request.node.add_marker(
pytest.mark.xfail(
reason=(
"pandas default unstable sorting of duplicates"
"issue with numpy>=1.25 with AVX instructions"
),
strict=False,
)
)

education_df = education_df.copy()
education_df["gender"] = education_df["gender"].astype("category")
education_df["education"] = education_df["education"].astype("category")
Expand Down