@@ -33,7 +33,8 @@ def ser():
33
33
["max" , np .array ([2 , 6 , 7 , 4 , np .nan , 4 , 2 , 8 , np .nan , 6 ])],
34
34
["first" , np .array ([1 , 5 , 7 , 3 , np .nan , 4 , 2 , 8 , np .nan , 6 ])],
35
35
["dense" , np .array ([1 , 3 , 4 , 2 , np .nan , 2 , 1 , 5 , np .nan , 3 ])],
36
- ]
36
+ ],
37
+ ids = lambda x : x [0 ],
37
38
)
38
39
def results (request ):
39
40
return request .param
@@ -48,12 +49,29 @@ def results(request):
48
49
"Int64" ,
49
50
pytest .param ("float64[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
50
51
pytest .param ("int64[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
52
+ pytest .param ("string[pyarrow]" , marks = td .skip_if_no ("pyarrow" )),
53
+ "string[python]" ,
54
+ "str" ,
51
55
]
52
56
)
53
57
def dtype (request ):
54
58
return request .param
55
59
56
60
61
+ def expected_dtype (dtype , method , pct = False ):
62
+ exp_dtype = "float64"
63
+ # elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
64
+ if dtype in ["string[pyarrow]" ]:
65
+ exp_dtype = "Float64"
66
+ elif dtype in ["float64[pyarrow]" , "int64[pyarrow]" ]:
67
+ if method == "average" or pct :
68
+ exp_dtype = "double[pyarrow]"
69
+ else :
70
+ exp_dtype = "uint64[pyarrow]"
71
+
72
+ return exp_dtype
73
+
74
+
57
75
class TestSeriesRank :
58
76
def test_rank (self , datetime_series ):
59
77
sp_stats = pytest .importorskip ("scipy.stats" )
@@ -251,12 +269,14 @@ def test_rank_signature(self):
251
269
with pytest .raises (ValueError , match = msg ):
252
270
s .rank ("average" )
253
271
254
- @pytest .mark .parametrize ("dtype" , [None , object ])
255
- def test_rank_tie_methods (self , ser , results , dtype ):
272
+ def test_rank_tie_methods (self , ser , results , dtype , using_infer_string ):
256
273
method , exp = results
274
+ if dtype == "int64" or (not using_infer_string and dtype == "str" ):
275
+ pytest .skip ("int64/str does not support NaN" )
276
+
257
277
ser = ser if dtype is None else ser .astype (dtype )
258
278
result = ser .rank (method = method )
259
- tm .assert_series_equal (result , Series (exp ))
279
+ tm .assert_series_equal (result , Series (exp , dtype = expected_dtype ( dtype , method ) ))
260
280
261
281
@pytest .mark .parametrize ("na_option" , ["top" , "bottom" , "keep" ])
262
282
@pytest .mark .parametrize (
@@ -357,25 +377,35 @@ def test_rank_methods_series(self, rank_method, op, value):
357
377
],
358
378
)
359
379
def test_rank_dense_method (self , dtype , ser , exp ):
380
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
381
+ exp = exp [::- 1 ]
360
382
s = Series (ser ).astype (dtype )
361
383
result = s .rank (method = "dense" )
362
- expected = Series (exp ).astype (result . dtype )
384
+ expected = Series (exp ).astype (expected_dtype ( dtype , "dense" ) )
363
385
tm .assert_series_equal (result , expected )
364
386
365
- def test_rank_descending (self , ser , results , dtype ):
387
+ def test_rank_descending (self , ser , results , dtype , using_infer_string ):
366
388
method , _ = results
367
- if "i" in dtype :
389
+ if dtype == "int64" or ( not using_infer_string and dtype == "str" ) :
368
390
s = ser .dropna ()
369
391
else :
370
392
s = ser .astype (dtype )
371
393
372
394
res = s .rank (ascending = False )
373
- expected = (s .max () - s ).rank ()
374
- tm .assert_series_equal (res , expected )
395
+ if dtype .startswith ("str" ):
396
+ expected = (s .astype ("float64" ).max () - s .astype ("float64" )).rank ()
397
+ else :
398
+ expected = (s .max () - s ).rank ()
399
+ tm .assert_series_equal (res , expected .astype (expected_dtype (dtype , "average" )))
375
400
376
- expected = (s .max () - s ).rank (method = method )
401
+ if dtype .startswith ("str" ):
402
+ expected = (s .astype ("float64" ).max () - s .astype ("float64" )).rank (
403
+ method = method
404
+ )
405
+ else :
406
+ expected = (s .max () - s ).rank (method = method )
377
407
res2 = s .rank (method = method , ascending = False )
378
- tm .assert_series_equal (res2 , expected )
408
+ tm .assert_series_equal (res2 , expected . astype ( expected_dtype ( dtype , method )) )
379
409
380
410
def test_rank_int (self , ser , results ):
381
411
method , exp = results
@@ -432,9 +462,11 @@ def test_rank_ea_small_values(self):
432
462
],
433
463
)
434
464
def test_rank_dense_pct (dtype , ser , exp ):
465
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
466
+ exp = exp [::- 1 ]
435
467
s = Series (ser ).astype (dtype )
436
468
result = s .rank (method = "dense" , pct = True )
437
- expected = Series (exp ).astype (result . dtype )
469
+ expected = Series (exp ).astype (expected_dtype ( dtype , "dense" , pct = True ) )
438
470
tm .assert_series_equal (result , expected )
439
471
440
472
@@ -453,9 +485,11 @@ def test_rank_dense_pct(dtype, ser, exp):
453
485
],
454
486
)
455
487
def test_rank_min_pct (dtype , ser , exp ):
488
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
489
+ exp = exp [::- 1 ]
456
490
s = Series (ser ).astype (dtype )
457
491
result = s .rank (method = "min" , pct = True )
458
- expected = Series (exp ).astype (result . dtype )
492
+ expected = Series (exp ).astype (expected_dtype ( dtype , "min" , pct = True ) )
459
493
tm .assert_series_equal (result , expected )
460
494
461
495
@@ -474,9 +508,11 @@ def test_rank_min_pct(dtype, ser, exp):
474
508
],
475
509
)
476
510
def test_rank_max_pct (dtype , ser , exp ):
511
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
512
+ exp = exp [::- 1 ]
477
513
s = Series (ser ).astype (dtype )
478
514
result = s .rank (method = "max" , pct = True )
479
- expected = Series (exp ).astype (result . dtype )
515
+ expected = Series (exp ).astype (expected_dtype ( dtype , "max" , pct = True ) )
480
516
tm .assert_series_equal (result , expected )
481
517
482
518
@@ -495,9 +531,11 @@ def test_rank_max_pct(dtype, ser, exp):
495
531
],
496
532
)
497
533
def test_rank_average_pct (dtype , ser , exp ):
534
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
535
+ exp = exp [::- 1 ]
498
536
s = Series (ser ).astype (dtype )
499
537
result = s .rank (method = "average" , pct = True )
500
- expected = Series (exp ).astype (result . dtype )
538
+ expected = Series (exp ).astype (expected_dtype ( dtype , "average" , pct = True ) )
501
539
tm .assert_series_equal (result , expected )
502
540
503
541
@@ -516,9 +554,11 @@ def test_rank_average_pct(dtype, ser, exp):
516
554
],
517
555
)
518
556
def test_rank_first_pct (dtype , ser , exp ):
557
+ if ser [0 ] < 0 and dtype .startswith ("str" ):
558
+ exp = exp [::- 1 ]
519
559
s = Series (ser ).astype (dtype )
520
560
result = s .rank (method = "first" , pct = True )
521
- expected = Series (exp ).astype (result . dtype )
561
+ expected = Series (exp ).astype (expected_dtype ( dtype , "first" , pct = True ) )
522
562
tm .assert_series_equal (result , expected )
523
563
524
564
0 commit comments