Skip to content

Commit 62b474b

Browse files
jbrockmendeljorisvandenbossche
authored andcommitted
TST (string): fix xfailed groupby value_counts tests (pandas-dev#59632)
1 parent 1833ccb commit 62b474b

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

pandas/tests/groupby/methods/test_value_counts.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,6 @@
88
import numpy as np
99
import pytest
1010

11-
from pandas._config import using_string_dtype
12-
13-
from pandas.compat import HAS_PYARROW
1411
import pandas.util._test_decorators as td
1512

1613
from pandas import (
@@ -288,7 +285,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
288285
return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending)
289286

290287

291-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
292288
@pytest.mark.parametrize("groupby", ["column", "array", "function"])
293289
@pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
294290
@pytest.mark.parametrize(
@@ -302,7 +298,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
302298
@pytest.mark.parametrize("as_index", [True, False])
303299
@pytest.mark.parametrize("frame", [True, False])
304300
def test_against_frame_and_seriesgroupby(
305-
education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
301+
education_df,
302+
groupby,
303+
normalize,
304+
name,
305+
sort,
306+
ascending,
307+
as_index,
308+
frame,
309+
request,
310+
using_infer_string,
306311
):
307312
# test all parameters:
308313
# - Use column, array or function as by= parameter
@@ -366,17 +371,24 @@ def test_against_frame_and_seriesgroupby(
366371
index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
367372
index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
368373
del index_frame["both"]
369-
index_frame = index_frame.rename({0: None}, axis=1)
370-
expected.index = MultiIndex.from_frame(index_frame)
374+
index_frame2 = index_frame.rename({0: None}, axis=1)
375+
expected.index = MultiIndex.from_frame(index_frame2)
376+
377+
if index_frame2.columns.isna()[0]:
378+
# with using_infer_string, the columns in index_frame as string
379+
# dtype, which makes the rename({0: None}) above use np.nan
380+
# instead of None, so we need to set None more explicitly.
381+
expected.index.names = [None] + expected.index.names[1:]
371382
tm.assert_series_equal(result, expected)
372383
else:
373384
expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
374385
expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
386+
if using_infer_string:
387+
expected = expected.astype({"gender": "str", "education": "str"})
375388
del expected["both"]
376389
tm.assert_frame_equal(result, expected)
377390

378391

379-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
380392
@pytest.mark.parametrize(
381393
"dtype",
382394
[
@@ -403,6 +415,7 @@ def test_compound(
403415
expected_count,
404416
expected_group_size,
405417
dtype,
418+
using_infer_string,
406419
):
407420
education_df = education_df.astype(dtype)
408421
education_df.columns = education_df.columns.astype(dtype)
@@ -425,6 +438,11 @@ def test_compound(
425438
expected["count"] = expected_count
426439
if dtype == "string[pyarrow]":
427440
expected["count"] = expected["count"].convert_dtypes()
441+
if using_infer_string and dtype == object:
442+
expected = expected.astype(
443+
{"country": "str", "gender": "str", "education": "str"}
444+
)
445+
428446
tm.assert_frame_equal(result, expected)
429447

430448

@@ -537,9 +555,6 @@ def names_with_nulls_df(nulls_fixture):
537555
)
538556

539557

540-
@pytest.mark.xfail(
541-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
542-
)
543558
@pytest.mark.parametrize(
544559
"dropna, expected_data, expected_index",
545560
[

0 commit comments

Comments
 (0)