8
8
import numpy as np
9
9
import pytest
10
10
11
- from pandas ._config import using_string_dtype
12
-
13
- from pandas .compat import HAS_PYARROW
14
11
import pandas .util ._test_decorators as td
15
12
16
13
from pandas import (
@@ -288,7 +285,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
288
285
return df [keys ].value_counts (normalize = normalize , sort = sort , ascending = ascending )
289
286
290
287
291
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
292
288
@pytest .mark .parametrize ("groupby" , ["column" , "array" , "function" ])
293
289
@pytest .mark .parametrize ("normalize, name" , [(True , "proportion" ), (False , "count" )])
294
290
@pytest .mark .parametrize (
@@ -302,7 +298,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
302
298
@pytest .mark .parametrize ("as_index" , [True , False ])
303
299
@pytest .mark .parametrize ("frame" , [True , False ])
304
300
def test_against_frame_and_seriesgroupby (
305
- education_df , groupby , normalize , name , sort , ascending , as_index , frame , request
301
+ education_df ,
302
+ groupby ,
303
+ normalize ,
304
+ name ,
305
+ sort ,
306
+ ascending ,
307
+ as_index ,
308
+ frame ,
309
+ request ,
310
+ using_infer_string ,
306
311
):
307
312
# test all parameters:
308
313
# - Use column, array or function as by= parameter
@@ -366,17 +371,24 @@ def test_against_frame_and_seriesgroupby(
366
371
index_frame ["gender" ] = index_frame ["both" ].str .split ("-" ).str .get (0 )
367
372
index_frame ["education" ] = index_frame ["both" ].str .split ("-" ).str .get (1 )
368
373
del index_frame ["both" ]
369
- index_frame = index_frame .rename ({0 : None }, axis = 1 )
370
- expected .index = MultiIndex .from_frame (index_frame )
374
+ index_frame2 = index_frame .rename ({0 : None }, axis = 1 )
375
+ expected .index = MultiIndex .from_frame (index_frame2 )
376
+
377
+ if index_frame2 .columns .isna ()[0 ]:
378
+ # with using_infer_string, the columns in index_frame as string
379
+ # dtype, which makes the rename({0: None}) above use np.nan
380
+ # instead of None, so we need to set None more explicitly.
381
+ expected .index .names = [None ] + expected .index .names [1 :]
371
382
tm .assert_series_equal (result , expected )
372
383
else :
373
384
expected .insert (1 , "gender" , expected ["both" ].str .split ("-" ).str .get (0 ))
374
385
expected .insert (2 , "education" , expected ["both" ].str .split ("-" ).str .get (1 ))
386
+ if using_infer_string :
387
+ expected = expected .astype ({"gender" : "str" , "education" : "str" })
375
388
del expected ["both" ]
376
389
tm .assert_frame_equal (result , expected )
377
390
378
391
379
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
380
392
@pytest .mark .parametrize (
381
393
"dtype" ,
382
394
[
@@ -403,6 +415,7 @@ def test_compound(
403
415
expected_count ,
404
416
expected_group_size ,
405
417
dtype ,
418
+ using_infer_string ,
406
419
):
407
420
education_df = education_df .astype (dtype )
408
421
education_df .columns = education_df .columns .astype (dtype )
@@ -425,6 +438,11 @@ def test_compound(
425
438
expected ["count" ] = expected_count
426
439
if dtype == "string[pyarrow]" :
427
440
expected ["count" ] = expected ["count" ].convert_dtypes ()
441
+ if using_infer_string and dtype == object :
442
+ expected = expected .astype (
443
+ {"country" : "str" , "gender" : "str" , "education" : "str" }
444
+ )
445
+
428
446
tm .assert_frame_equal (result , expected )
429
447
430
448
@@ -537,9 +555,6 @@ def names_with_nulls_df(nulls_fixture):
537
555
)
538
556
539
557
540
- @pytest .mark .xfail (
541
- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
542
- )
543
558
@pytest .mark .parametrize (
544
559
"dropna, expected_data, expected_index" ,
545
560
[
0 commit comments