Skip to content

Commit 5ad25d0

Browse files
authored
TST (string): fix xfailed groupby value_counts tests (#59632)
1 parent 7750f49 commit 5ad25d0

File tree

1 file changed

+26
-11
lines changed

1 file changed

+26
-11
lines changed

pandas/tests/groupby/methods/test_value_counts.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
from pandas._config import using_string_dtype
11-
12-
from pandas.compat import HAS_PYARROW
1310
import pandas.util._test_decorators as td
1411

1512
from pandas import (
@@ -276,7 +273,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
276273
return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending)
277274

278275

279-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
280276
@pytest.mark.parametrize("groupby", ["column", "array", "function"])
281277
@pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
282278
@pytest.mark.parametrize(
@@ -289,7 +285,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
289285
)
290286
@pytest.mark.parametrize("frame", [True, False])
291287
def test_against_frame_and_seriesgroupby(
292-
education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
288+
education_df,
289+
groupby,
290+
normalize,
291+
name,
292+
sort,
293+
ascending,
294+
as_index,
295+
frame,
296+
request,
297+
using_infer_string,
293298
):
294299
# test all parameters:
295300
# - Use column, array or function as by= parameter
@@ -350,17 +355,24 @@ def test_against_frame_and_seriesgroupby(
350355
index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
351356
index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
352357
del index_frame["both"]
353-
index_frame = index_frame.rename({0: None}, axis=1)
354-
expected.index = MultiIndex.from_frame(index_frame)
358+
index_frame2 = index_frame.rename({0: None}, axis=1)
359+
expected.index = MultiIndex.from_frame(index_frame2)
360+
361+
if index_frame2.columns.isna()[0]:
362+
# with using_infer_string, the columns in index_frame as string
363+
# dtype, which makes the rename({0: None}) above use np.nan
364+
# instead of None, so we need to set None more explicitly.
365+
expected.index.names = [None] + expected.index.names[1:]
355366
tm.assert_series_equal(result, expected)
356367
else:
357368
expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
358369
expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
370+
if using_infer_string:
371+
expected = expected.astype({"gender": "str", "education": "str"})
359372
del expected["both"]
360373
tm.assert_frame_equal(result, expected)
361374

362375

363-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
364376
@pytest.mark.parametrize(
365377
"dtype",
366378
[
@@ -387,6 +399,7 @@ def test_compound(
387399
expected_count,
388400
expected_group_size,
389401
dtype,
402+
using_infer_string,
390403
):
391404
education_df = education_df.astype(dtype)
392405
education_df.columns = education_df.columns.astype(dtype)
@@ -409,6 +422,11 @@ def test_compound(
409422
expected["count"] = expected_count
410423
if dtype == "string[pyarrow]":
411424
expected["count"] = expected["count"].convert_dtypes()
425+
if using_infer_string and dtype == object:
426+
expected = expected.astype(
427+
{"country": "str", "gender": "str", "education": "str"}
428+
)
429+
412430
tm.assert_frame_equal(result, expected)
413431

414432

@@ -501,9 +519,6 @@ def test_dropna_combinations(
501519
tm.assert_series_equal(result, expected)
502520

503521

504-
@pytest.mark.xfail(
505-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
506-
)
507522
@pytest.mark.parametrize(
508523
"dropna, expected_data, expected_index",
509524
[

0 commit comments

Comments
 (0)