@@ -33,7 +33,8 @@ def ser():
33
33
["max" , np .array ([2 , 6 , 7 , 4 , np .nan , 4 , 2 , 8 , np .nan , 6 ])],
34
34
["first" , np .array ([1 , 5 , 7 , 3 , np .nan , 4 , 2 , 8 , np .nan , 6 ])],
35
35
["dense" , np .array ([1 , 3 , 4 , 2 , np .nan , 2 , 1 , 5 , np .nan , 3 ])],
36
- ]
36
+ ],
37
+ ids = lambda x : x [0 ],
37
38
)
38
39
def results (request ):
39
40
return request .param
@@ -48,12 +49,29 @@ def results(request):
48
49
"Int64" ,
49
50
pytest .param ("float64[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
50
51
pytest .param ("int64[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
52
+ pytest .param ("string[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
53
+ "string[python]" ,
54
+ "str" ,
51
55
]
52
56
)
53
57
def dtype (request ):
54
58
return request .param
55
59
56
60
61
+ def expected_dtype (dtype , method , pct = False ):
62
+ exp_dtype = "float64"
63
+ # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
64
+ if dtype in ["string[pyarrow]" ]:
65
+ exp_dtype = "Float64"
66
+ elif dtype in ["float64[pyarrow]" , "int64[pyarrow]" ]:
67
+ if method == "average" or pct :
68
+ exp_dtype = "double[pyarrow]"
69
+ else :
70
+ exp_dtype = "uint64[pyarrow]"
71
+
72
+ return exp_dtype
73
+
74
+
57
75
class TestSeriesRank :
58
76
def test_rank (self , datetime_series ):
59
77
sp_stats = pytest .importorskip ("scipy.stats" )
@@ -241,12 +259,14 @@ def test_rank_signature(self):
241
259
with pytest .raises (ValueError , match = msg ):
242
260
s .rank ("average" )
243
261
244
- @pytest .mark .parametrize ("dtype" , [None , object ])
245
- def test_rank_tie_methods (self , ser , results , dtype ):
262
+ def test_rank_tie_methods (self , ser , results , dtype , using_infer_string ):
246
263
method , exp = results
264
+ if dtype == "int64" or (not using_infer_string and dtype == "str" ):
265
+ pytest .skip ("int64/str does not support NaN" )
266
+
247
267
ser = ser if dtype is None else ser .astype (dtype )
248
268
result = ser .rank (method = method )
249
- tm .assert_series_equal (result , Series (exp ))
269
+ tm .assert_series_equal (result , Series (exp , dtype = expected_dtype ( dtype , method ) ))
250
270
251
271
@pytest .mark .parametrize ("ascending" , [True , False ])
252
272
@pytest .mark .parametrize ("method" , ["average" , "min" , "max" , "first" , "dense" ])
@@ -346,25 +366,35 @@ def test_rank_methods_series(self, method, op, value):
346
366
],
347
367
)
348
368
def test_rank_dense_method (self , dtype , ser , exp ):
369
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
370
+ exp = exp [::- 1 ]
349
371
s = Series (ser ).astype (dtype )
350
372
result = s .rank (method = "dense" )
351
- expected = Series (exp ).astype (result . dtype )
373
+ expected = Series (exp ).astype (expected_dtype ( dtype , "dense" ) )
352
374
tm .assert_series_equal (result , expected )
353
375
354
- def test_rank_descending (self , ser , results , dtype ):
376
+ def test_rank_descending (self , ser , results , dtype , using_infer_string ):
355
377
method , _ = results
356
- if "i" in dtype :
378
+ if dtype == "int64" or ( not using_infer_string and dtype == "str" ) :
357
379
s = ser .dropna ()
358
380
else :
359
381
s = ser .astype (dtype )
360
382
361
383
res = s .rank (ascending = False )
362
- expected = (s .max () - s ).rank ()
363
- tm .assert_series_equal (res , expected )
384
+ if dtype .startswith ("str" ):
385
+ expected = (s .astype ("float64" ).max () - s .astype ("float64" )).rank ()
386
+ else :
387
+ expected = (s .max () - s ).rank ()
388
+ tm .assert_series_equal (res , expected .astype (expected_dtype (dtype , "average" )))
364
389
365
- expected = (s .max () - s ).rank (method = method )
390
+ if dtype .startswith ("str" ):
391
+ expected = (s .astype ("float64" ).max () - s .astype ("float64" )).rank (
392
+ method = method
393
+ )
394
+ else :
395
+ expected = (s .max () - s ).rank (method = method )
366
396
res2 = s .rank (method = method , ascending = False )
367
- tm .assert_series_equal (res2 , expected )
397
+ tm .assert_series_equal (res2 , expected . astype ( expected_dtype ( dtype , method )) )
368
398
369
399
def test_rank_int (self , ser , results ):
370
400
method , exp = results
@@ -421,9 +451,11 @@ def test_rank_ea_small_values(self):
421
451
],
422
452
)
423
453
def test_rank_dense_pct (dtype , ser , exp ):
454
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
455
+ exp = exp [::- 1 ]
424
456
s = Series (ser ).astype (dtype )
425
457
result = s .rank (method = "dense" , pct = True )
426
- expected = Series (exp ).astype (result . dtype )
458
+ expected = Series (exp ).astype (expected_dtype ( dtype , "dense" , pct = True ) )
427
459
tm .assert_series_equal (result , expected )
428
460
429
461
@@ -442,9 +474,11 @@ def test_rank_dense_pct(dtype, ser, exp):
442
474
],
443
475
)
444
476
def test_rank_min_pct (dtype , ser , exp ):
477
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
478
+ exp = exp [::- 1 ]
445
479
s = Series (ser ).astype (dtype )
446
480
result = s .rank (method = "min" , pct = True )
447
- expected = Series (exp ).astype (result . dtype )
481
+ expected = Series (exp ).astype (expected_dtype ( dtype , "min" , pct = True ) )
448
482
tm .assert_series_equal (result , expected )
449
483
450
484
@@ -463,9 +497,11 @@ def test_rank_min_pct(dtype, ser, exp):
463
497
],
464
498
)
465
499
def test_rank_max_pct (dtype , ser , exp ):
500
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
501
+ exp = exp [::- 1 ]
466
502
s = Series (ser ).astype (dtype )
467
503
result = s .rank (method = "max" , pct = True )
468
- expected = Series (exp ).astype (result . dtype )
504
+ expected = Series (exp ).astype (expected_dtype ( dtype , "max" , pct = True ) )
469
505
tm .assert_series_equal (result , expected )
470
506
471
507
@@ -484,9 +520,11 @@ def test_rank_max_pct(dtype, ser, exp):
484
520
],
485
521
)
486
522
def test_rank_average_pct (dtype , ser , exp ):
523
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
524
+ exp = exp [::- 1 ]
487
525
s = Series (ser ).astype (dtype )
488
526
result = s .rank (method = "average" , pct = True )
489
- expected = Series (exp ).astype (result . dtype )
527
+ expected = Series (exp ).astype (expected_dtype ( dtype , "average" , pct = True ) )
490
528
tm .assert_series_equal (result , expected )
491
529
492
530
@@ -505,9 +543,11 @@ def test_rank_average_pct(dtype, ser, exp):
505
543
],
506
544
)
507
545
def test_rank_first_pct (dtype , ser , exp ):
546
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
547
+ exp = exp [::- 1 ]
508
548
s = Series (ser ).astype (dtype )
509
549
result = s .rank (method = "first" , pct = True )
510
- expected = Series (exp ).astype (result . dtype )
550
+ expected = Series (exp ).astype (expected_dtype ( dtype , "first" , pct = True ) )
511
551
tm .assert_series_equal (result , expected )
512
552
513
553
0 commit comments