6
6
from pandas ._config import using_string_dtype
7
7
8
8
from pandas ._libs import lib
9
+ from pandas .compat import HAS_PYARROW
9
10
10
11
from pandas .core .dtypes .common import ensure_platform_int
11
12
@@ -372,8 +373,7 @@ def test_transform_select_columns(df):
372
373
tm .assert_frame_equal (result , expected )
373
374
374
375
375
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
376
- def test_transform_nuisance_raises (df ):
376
+ def test_transform_nuisance_raises (df , using_infer_string ):
377
377
# case that goes through _transform_item_by_item
378
378
379
379
df .columns = ["A" , "B" , "B" , "D" ]
@@ -383,10 +383,16 @@ def test_transform_nuisance_raises(df):
383
383
grouped = df .groupby ("A" )
384
384
385
385
gbc = grouped ["B" ]
386
- with pytest .raises (TypeError , match = "Could not convert" ):
386
+ msg = "Could not convert"
387
+ if using_infer_string :
388
+ if df .columns .dtype .storage == "pyarrow" :
389
+ msg = "with dtype str does not support operation 'mean'"
390
+ else :
391
+ msg = "Cannot perform reduction 'mean' with string dtype"
392
+ with pytest .raises (TypeError , match = msg ):
387
393
gbc .transform (lambda x : np .mean (x ))
388
394
389
- with pytest .raises (TypeError , match = "Could not convert" ):
395
+ with pytest .raises (TypeError , match = msg ):
390
396
df .groupby ("A" ).transform (lambda x : np .mean (x ))
391
397
392
398
@@ -445,8 +451,7 @@ def test_transform_coercion():
445
451
tm .assert_frame_equal (result , expected )
446
452
447
453
448
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
449
- def test_groupby_transform_with_int ():
454
+ def test_groupby_transform_with_int (using_infer_string ):
450
455
# GH 3740, make sure that we might upcast on item-by-item transform
451
456
452
457
# floats
@@ -476,8 +481,14 @@ def test_groupby_transform_with_int():
476
481
"D" : "foo" ,
477
482
}
478
483
)
484
+ msg = "Could not convert"
485
+ if using_infer_string :
486
+ if HAS_PYARROW :
487
+ msg = "with dtype str does not support operation 'mean'"
488
+ else :
489
+ msg = "Cannot perform reduction 'mean' with string dtype"
479
490
with np .errstate (all = "ignore" ):
480
- with pytest .raises (TypeError , match = "Could not convert" ):
491
+ with pytest .raises (TypeError , match = msg ):
481
492
df .groupby ("A" ).transform (lambda x : (x - x .mean ()) / x .std ())
482
493
result = df .groupby ("A" )[["B" , "C" ]].transform (
483
494
lambda x : (x - x .mean ()) / x .std ()
@@ -489,7 +500,7 @@ def test_groupby_transform_with_int():
489
500
s = Series ([2 , 3 , 4 , 10 , 5 , - 1 ])
490
501
df = DataFrame ({"A" : [1 , 1 , 1 , 2 , 2 , 2 ], "B" : 1 , "C" : s , "D" : "foo" })
491
502
with np .errstate (all = "ignore" ):
492
- with pytest .raises (TypeError , match = "Could not convert" ):
503
+ with pytest .raises (TypeError , match = msg ):
493
504
df .groupby ("A" ).transform (lambda x : (x - x .mean ()) / x .std ())
494
505
result = df .groupby ("A" )[["B" , "C" ]].transform (
495
506
lambda x : (x - x .mean ()) / x .std ()
@@ -705,7 +716,6 @@ def test_cython_transform_frame(request, op, args, targop, df_fix, gb_target):
705
716
tm .assert_frame_equal (result , expected )
706
717
707
718
708
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
709
719
@pytest .mark .slow
710
720
@pytest .mark .parametrize (
711
721
"op, args, targop" ,
@@ -757,6 +767,7 @@ def test_cython_transform_frame_column(
757
767
"does not support operation" ,
758
768
".* is not supported for object dtype" ,
759
769
"is not implemented for this dtype" ,
770
+ ".* is not supported for str dtype" ,
760
771
]
761
772
)
762
773
with pytest .raises (TypeError , match = msg ):
0 commit comments