7
7
import numpy as np
8
8
import pytest
9
9
10
- from pandas ._config import using_string_dtype
11
-
12
- from pandas .compat import HAS_PYARROW
13
10
import pandas .util ._test_decorators as td
14
11
15
12
from pandas import (
@@ -276,7 +273,6 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
276
273
return df [keys ].value_counts (normalize = normalize , sort = sort , ascending = ascending )
277
274
278
275
279
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
280
276
@pytest .mark .parametrize ("groupby" , ["column" , "array" , "function" ])
281
277
@pytest .mark .parametrize ("normalize, name" , [(True , "proportion" ), (False , "count" )])
282
278
@pytest .mark .parametrize (
@@ -289,7 +285,16 @@ def _frame_value_counts(df, keys, normalize, sort, ascending):
289
285
)
290
286
@pytest .mark .parametrize ("frame" , [True , False ])
291
287
def test_against_frame_and_seriesgroupby (
292
- education_df , groupby , normalize , name , sort , ascending , as_index , frame , request
288
+ education_df ,
289
+ groupby ,
290
+ normalize ,
291
+ name ,
292
+ sort ,
293
+ ascending ,
294
+ as_index ,
295
+ frame ,
296
+ request ,
297
+ using_infer_string ,
293
298
):
294
299
# test all parameters:
295
300
# - Use column, array or function as by= parameter
@@ -350,17 +355,24 @@ def test_against_frame_and_seriesgroupby(
350
355
index_frame ["gender" ] = index_frame ["both" ].str .split ("-" ).str .get (0 )
351
356
index_frame ["education" ] = index_frame ["both" ].str .split ("-" ).str .get (1 )
352
357
del index_frame ["both" ]
353
- index_frame = index_frame .rename ({0 : None }, axis = 1 )
354
- expected .index = MultiIndex .from_frame (index_frame )
358
+ index_frame2 = index_frame .rename ({0 : None }, axis = 1 )
359
+ expected .index = MultiIndex .from_frame (index_frame2 )
360
+
361
+ if index_frame2 .columns .isna ()[0 ]:
362
+ # with using_infer_string, the columns in index_frame as string
363
+ # dtype, which makes the rename({0: None}) above use np.nan
364
+ # instead of None, so we need to set None more explicitly.
365
+ expected .index .names = [None ] + expected .index .names [1 :]
355
366
tm .assert_series_equal (result , expected )
356
367
else :
357
368
expected .insert (1 , "gender" , expected ["both" ].str .split ("-" ).str .get (0 ))
358
369
expected .insert (2 , "education" , expected ["both" ].str .split ("-" ).str .get (1 ))
370
+ if using_infer_string :
371
+ expected = expected .astype ({"gender" : "str" , "education" : "str" })
359
372
del expected ["both" ]
360
373
tm .assert_frame_equal (result , expected )
361
374
362
375
363
- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
364
376
@pytest .mark .parametrize (
365
377
"dtype" ,
366
378
[
@@ -387,6 +399,7 @@ def test_compound(
387
399
expected_count ,
388
400
expected_group_size ,
389
401
dtype ,
402
+ using_infer_string ,
390
403
):
391
404
education_df = education_df .astype (dtype )
392
405
education_df .columns = education_df .columns .astype (dtype )
@@ -409,6 +422,11 @@ def test_compound(
409
422
expected ["count" ] = expected_count
410
423
if dtype == "string[pyarrow]" :
411
424
expected ["count" ] = expected ["count" ].convert_dtypes ()
425
+ if using_infer_string and dtype == object :
426
+ expected = expected .astype (
427
+ {"country" : "str" , "gender" : "str" , "education" : "str" }
428
+ )
429
+
412
430
tm .assert_frame_equal (result , expected )
413
431
414
432
@@ -501,9 +519,6 @@ def test_dropna_combinations(
501
519
tm .assert_series_equal (result , expected )
502
520
503
521
504
- @pytest .mark .xfail (
505
- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)" , strict = False
506
- )
507
522
@pytest .mark .parametrize (
508
523
"dropna, expected_data, expected_index" ,
509
524
[
0 commit comments