Skip to content

Commit 8fa78ec

Browse files
authored
TST (string) fix xfailed groupby tests (3) (#59642)
* TST (string) fix xfailed groupby tests (3) * TST: non-pyarrow build
1 parent ad077aa commit 8fa78ec

File tree

4 files changed

+24
-35
lines changed

4 files changed

+24
-35
lines changed

pandas/tests/groupby/methods/test_describe.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys):
7371
tm.assert_frame_equal(result, expected)
7472

7573

76-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7774
def test_frame_describe_multikey(tsframe):
7875
grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
7976
result = grouped.describe()
@@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
8279
group = grouped[col].describe()
8380
# GH 17464 - Remove duplicate MultiIndex levels
8481
group_col = MultiIndex(
85-
levels=[[col], group.columns],
82+
levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
8683
codes=[[0] * len(group.columns), range(len(group.columns))],
8784
)
8885
group = DataFrame(group.values, columns=group_col, index=group.index)
@@ -249,7 +246,6 @@ def test_describe_non_cython_paths():
249246
tm.assert_frame_equal(result, expected)
250247

251248

252-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
253249
@pytest.mark.parametrize("dtype", [int, float, object])
254250
@pytest.mark.parametrize(
255251
"kwargs",
@@ -271,5 +267,5 @@ def test_groupby_empty_dataset(dtype, kwargs):
271267

272268
result = df.iloc[:0].groupby("A").B.describe(**kwargs)
273269
expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
274-
expected.index = Index([])
270+
expected.index = Index([], dtype=df.columns.dtype)
275271
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/methods/test_nth.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -679,14 +677,14 @@ def test_first_multi_key_groupby_categorical():
679677
tm.assert_frame_equal(result, expected)
680678

681679

682-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
683680
@pytest.mark.parametrize("method", ["first", "last", "nth"])
684681
def test_groupby_last_first_nth_with_none(method, nulls_fixture):
685682
# GH29645
686-
expected = Series(["y"])
683+
expected = Series(["y"], dtype=object)
687684
data = Series(
688685
[nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
689686
index=[0, 0, 0, 0, 0],
687+
dtype=object,
690688
).groupby(level=0)
691689

692690
if method == "nth":

pandas/tests/groupby/test_groupby_dropna.py

-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from pandas._config import using_string_dtype
55

6-
from pandas.compat import HAS_PYARROW
76
from pandas.compat.pyarrow import pa_version_under10p1
87

98
from pandas.core.dtypes.missing import na_value_for_dtype
@@ -13,9 +12,6 @@
1312
from pandas.tests.groupby import get_groupby_method_args
1413

1514

16-
@pytest.mark.xfail(
17-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
18-
)
1915
@pytest.mark.parametrize(
2016
"dropna, tuples, outputs",
2117
[
@@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
5955
tm.assert_frame_equal(grouped, expected)
6056

6157

62-
@pytest.mark.xfail(
63-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
64-
)
6558
@pytest.mark.parametrize(
6659
"dropna, tuples, outputs",
6760
[
@@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
138131
tm.assert_frame_equal(grouped, expected)
139132

140133

141-
@pytest.mark.xfail(
142-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
143-
)
144134
@pytest.mark.parametrize(
145135
"dropna, idx, expected",
146136
[
@@ -215,9 +205,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
215205
tm.assert_series_equal(result, expected)
216206

217207

218-
@pytest.mark.xfail(
219-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
220-
)
221208
@pytest.mark.parametrize(
222209
"dropna, tuples, outputs",
223210
[
@@ -299,9 +286,6 @@ def test_groupby_dropna_datetime_like_data(
299286
tm.assert_frame_equal(grouped, expected)
300287

301288

302-
@pytest.mark.xfail(
303-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
304-
)
305289
@pytest.mark.parametrize(
306290
"dropna, data, selected_data, levels",
307291
[

pandas/tests/groupby/transform/test_transform.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas._config import using_string_dtype
77

88
from pandas._libs import lib
9+
from pandas.compat import HAS_PYARROW
910

1011
from pandas.core.dtypes.common import ensure_platform_int
1112

@@ -372,8 +373,7 @@ def test_transform_select_columns(df):
372373
tm.assert_frame_equal(result, expected)
373374

374375

375-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
376-
def test_transform_nuisance_raises(df):
376+
def test_transform_nuisance_raises(df, using_infer_string):
377377
# case that goes through _transform_item_by_item
378378

379379
df.columns = ["A", "B", "B", "D"]
@@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df):
383383
grouped = df.groupby("A")
384384

385385
gbc = grouped["B"]
386-
with pytest.raises(TypeError, match="Could not convert"):
386+
msg = "Could not convert"
387+
if using_infer_string:
388+
if df.columns.dtype.storage == "pyarrow":
389+
msg = "with dtype str does not support operation 'mean'"
390+
else:
391+
msg = "Cannot perform reduction 'mean' with string dtype"
392+
with pytest.raises(TypeError, match=msg):
387393
gbc.transform(lambda x: np.mean(x))
388394

389-
with pytest.raises(TypeError, match="Could not convert"):
395+
with pytest.raises(TypeError, match=msg):
390396
df.groupby("A").transform(lambda x: np.mean(x))
391397

392398

@@ -445,8 +451,7 @@ def test_transform_coercion():
445451
tm.assert_frame_equal(result, expected)
446452

447453

448-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
449-
def test_groupby_transform_with_int():
454+
def test_groupby_transform_with_int(using_infer_string):
450455
# GH 3740, make sure that we might upcast on item-by-item transform
451456

452457
# floats
@@ -476,8 +481,14 @@ def test_groupby_transform_with_int():
476481
"D": "foo",
477482
}
478483
)
484+
msg = "Could not convert"
485+
if using_infer_string:
486+
if HAS_PYARROW:
487+
msg = "with dtype str does not support operation 'mean'"
488+
else:
489+
msg = "Cannot perform reduction 'mean' with string dtype"
479490
with np.errstate(all="ignore"):
480-
with pytest.raises(TypeError, match="Could not convert"):
491+
with pytest.raises(TypeError, match=msg):
481492
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
482493
result = df.groupby("A")[["B", "C"]].transform(
483494
lambda x: (x - x.mean()) / x.std()
@@ -489,7 +500,7 @@ def test_groupby_transform_with_int():
489500
s = Series([2, 3, 4, 10, 5, -1])
490501
df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
491502
with np.errstate(all="ignore"):
492-
with pytest.raises(TypeError, match="Could not convert"):
503+
with pytest.raises(TypeError, match=msg):
493504
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
494505
result = df.groupby("A")[["B", "C"]].transform(
495506
lambda x: (x - x.mean()) / x.std()
@@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
705716
tm.assert_frame_equal(result, expected)
706717

707718

708-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
709719
@pytest.mark.slow
710720
@pytest.mark.parametrize(
711721
"op, args, targop",
@@ -757,6 +767,7 @@ def test_cython_transform_frame_column(
757767
"does not support operation",
758768
".* is not supported for object dtype",
759769
"is not implemented for this dtype",
770+
".* is not supported for str dtype",
760771
]
761772
)
762773
with pytest.raises(TypeError, match=msg):

0 commit comments

Comments
 (0)