Skip to content

Commit d64b8d8

Browse files
jbrockmendeljorisvandenbossche
authored andcommitted
TST (string) fix xfailed groupby tests (3) (#59642)
* TST (string) fix xfailed groupby tests (3) * TST: non-pyarrow build
1 parent 9cb66bf commit d64b8d8

File tree

4 files changed

+24
-35
lines changed

4 files changed

+24
-35
lines changed

pandas/tests/groupby/methods/test_describe.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -73,7 +71,6 @@ def test_series_describe_as_index(as_index, keys):
7371
tm.assert_frame_equal(result, expected)
7472

7573

76-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
7774
def test_frame_describe_multikey(tsframe):
7875
grouped = tsframe.groupby([lambda x: x.year, lambda x: x.month])
7976
result = grouped.describe()
@@ -82,7 +79,7 @@ def test_frame_describe_multikey(tsframe):
8279
group = grouped[col].describe()
8380
# GH 17464 - Remove duplicate MultiIndex levels
8481
group_col = MultiIndex(
85-
levels=[[col], group.columns],
82+
levels=[Index([col], dtype=tsframe.columns.dtype), group.columns],
8683
codes=[[0] * len(group.columns), range(len(group.columns))],
8784
)
8885
group = DataFrame(group.values, columns=group_col, index=group.index)
@@ -275,7 +272,6 @@ def test_describe(self, df, gb, gni):
275272
tm.assert_frame_equal(result, expected)
276273

277274

278-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
279275
@pytest.mark.parametrize("dtype", [int, float, object])
280276
@pytest.mark.parametrize(
281277
"kwargs",
@@ -297,5 +293,5 @@ def test_groupby_empty_dataset(dtype, kwargs):
297293

298294
result = df.iloc[:0].groupby("A").B.describe(**kwargs)
299295
expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0]
300-
expected.index = Index([])
296+
expected.index = Index([], dtype=df.columns.dtype)
301297
tm.assert_frame_equal(result, expected)

pandas/tests/groupby/methods/test_nth.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
from pandas import (
86
DataFrame,
@@ -706,14 +704,14 @@ def test_first_multi_key_groupby_categorical():
706704
tm.assert_frame_equal(result, expected)
707705

708706

709-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
710707
@pytest.mark.parametrize("method", ["first", "last", "nth"])
711708
def test_groupby_last_first_nth_with_none(method, nulls_fixture):
712709
# GH29645
713-
expected = Series(["y"])
710+
expected = Series(["y"], dtype=object)
714711
data = Series(
715712
[nulls_fixture, nulls_fixture, nulls_fixture, "y", nulls_fixture],
716713
index=[0, 0, 0, 0, 0],
714+
dtype=object,
717715
).groupby(level=0)
718716

719717
if method == "nth":

pandas/tests/groupby/test_groupby_dropna.py

-16
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
from pandas._config import using_string_dtype
55

6-
from pandas.compat import HAS_PYARROW
76
from pandas.compat.pyarrow import pa_version_under10p1
87

98
from pandas.core.dtypes.missing import na_value_for_dtype
@@ -13,9 +12,6 @@
1312
from pandas.tests.groupby import get_groupby_method_args
1413

1514

16-
@pytest.mark.xfail(
17-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
18-
)
1915
@pytest.mark.parametrize(
2016
"dropna, tuples, outputs",
2117
[
@@ -59,9 +55,6 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group(
5955
tm.assert_frame_equal(grouped, expected)
6056

6157

62-
@pytest.mark.xfail(
63-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
64-
)
6558
@pytest.mark.parametrize(
6659
"dropna, tuples, outputs",
6760
[
@@ -138,9 +131,6 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs):
138131
tm.assert_frame_equal(grouped, expected)
139132

140133

141-
@pytest.mark.xfail(
142-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
143-
)
144134
@pytest.mark.parametrize(
145135
"dropna, idx, expected",
146136
[
@@ -216,9 +206,6 @@ def test_groupby_dataframe_slice_then_transform(dropna, index):
216206
tm.assert_series_equal(result, expected)
217207

218208

219-
@pytest.mark.xfail(
220-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
221-
)
222209
@pytest.mark.parametrize(
223210
"dropna, tuples, outputs",
224211
[
@@ -300,9 +287,6 @@ def test_groupby_dropna_datetime_like_data(
300287
tm.assert_frame_equal(grouped, expected)
301288

302289

303-
@pytest.mark.xfail(
304-
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
305-
)
306290
@pytest.mark.parametrize(
307291
"dropna, data, selected_data, levels",
308292
[

pandas/tests/groupby/transform/test_transform.py

+20-9
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas._config import using_string_dtype
66

77
from pandas._libs import lib
8+
from pandas.compat import HAS_PYARROW
89

910
from pandas.core.dtypes.common import ensure_platform_int
1011

@@ -499,8 +500,7 @@ def test_transform_select_columns(df):
499500
tm.assert_frame_equal(result, expected)
500501

501502

502-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
503-
def test_transform_nuisance_raises(df):
503+
def test_transform_nuisance_raises(df, using_infer_string):
504504
# case that goes through _transform_item_by_item
505505

506506
df.columns = ["A", "B", "B", "D"]
@@ -510,10 +510,16 @@ def test_transform_nuisance_raises(df):
510510
grouped = df.groupby("A")
511511

512512
gbc = grouped["B"]
513-
with pytest.raises(TypeError, match="Could not convert"):
513+
msg = "Could not convert"
514+
if using_infer_string:
515+
if df.columns.dtype.storage == "pyarrow":
516+
msg = "with dtype str does not support operation 'mean'"
517+
else:
518+
msg = "Cannot perform reduction 'mean' with string dtype"
519+
with pytest.raises(TypeError, match=msg):
514520
gbc.transform(lambda x: np.mean(x))
515521

516-
with pytest.raises(TypeError, match="Could not convert"):
522+
with pytest.raises(TypeError, match=msg):
517523
df.groupby("A").transform(lambda x: np.mean(x))
518524

519525

@@ -582,8 +588,7 @@ def test_transform_coercion():
582588
tm.assert_frame_equal(result, expected)
583589

584590

585-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
586-
def test_groupby_transform_with_int():
591+
def test_groupby_transform_with_int(using_infer_string):
587592
# GH 3740, make sure that we might upcast on item-by-item transform
588593

589594
# floats
@@ -613,8 +618,14 @@ def test_groupby_transform_with_int():
613618
"D": "foo",
614619
}
615620
)
621+
msg = "Could not convert"
622+
if using_infer_string:
623+
if HAS_PYARROW:
624+
msg = "with dtype str does not support operation 'mean'"
625+
else:
626+
msg = "Cannot perform reduction 'mean' with string dtype"
616627
with np.errstate(all="ignore"):
617-
with pytest.raises(TypeError, match="Could not convert"):
628+
with pytest.raises(TypeError, match=msg):
618629
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
619630
result = df.groupby("A")[["B", "C"]].transform(
620631
lambda x: (x - x.mean()) / x.std()
@@ -626,7 +637,7 @@ def test_groupby_transform_with_int():
626637
s = Series([2, 3, 4, 10, 5, -1])
627638
df = DataFrame({"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": s, "D": "foo"})
628639
with np.errstate(all="ignore"):
629-
with pytest.raises(TypeError, match="Could not convert"):
640+
with pytest.raises(TypeError, match=msg):
630641
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
631642
result = df.groupby("A")[["B", "C"]].transform(
632643
lambda x: (x - x.mean()) / x.std()
@@ -850,7 +861,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
850861
tm.assert_frame_equal(result, expected)
851862

852863

853-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
854864
@pytest.mark.slow
855865
@pytest.mark.parametrize(
856866
"op, args, targop",
@@ -901,6 +911,7 @@ def test_cython_transform_frame_column(
901911
"does not support .* operations",
902912
".* is not supported for object dtype",
903913
"is not implemented for this dtype",
914+
".* is not supported for str dtype",
904915
]
905916
)
906917
with pytest.raises(TypeError, match=msg):

0 commit comments

Comments
 (0)