@@ -1057,59 +1057,77 @@ def test_rank(self):
1057
1057
iranks = iseries .rank ()
1058
1058
assert_series_equal (iranks , exp )
1059
1059
1060
+ def test_rank_categorical (self ):
1060
1061
# GH issue #15420 rank incorrectly orders ordered categories
1061
-
1062
+
1062
1063
# Test ascending/descending ranking for ordered categoricals
1063
1064
exp = pd .Series ([1. , 2. , 3. , 4. , 5. , 6. ])
1064
1065
exp_desc = pd .Series ([6. , 5. , 4. , 3. , 2. , 1. ])
1065
- ser = pd .Series (
1066
- ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' ]
1067
- )
1068
- ordered = ser .astype ('category' , ).cat .set_categories (
1066
+ ordered = pd .Categorical (
1067
+ ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' ],
1069
1068
['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' ],
1070
1069
ordered = True
1071
1070
)
1072
1071
assert_series_equal (ordered .rank (), exp )
1073
1072
assert_series_equal (ordered .rank (ascending = False ), exp_desc )
1074
1073
1075
1074
# Unordered categoricals should be ranked as objects
1076
- unordered = ser .astype ('category' , ).cat .set_categories (
1077
- ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' ],
1075
+ unord_ser = pd .Series (['first' , 'second' , 'third' , 'fourth' ])
1076
+ unordered = pd .Categorical (
1077
+ ['first' , 'second' , 'third' , 'fourth' ],
1078
+ ['first' , 'second' , 'third' , 'fourth' ],
1078
1079
ordered = False
1079
1080
)
1080
1081
res = unordered .rank ()
1081
- assert_series_equal (res , unordered .astype (object ).rank ())
1082
+ assert_series_equal (res , unord_ser .astype (object ).rank ())
1082
1083
1083
1084
# Test na_option for rank data
1084
- na_ser = pd .Series (
1085
- ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' , np .NaN ]
1086
- ).astype ('category' , ).cat .set_categories (
1087
- [
1088
- 'first' , 'second' , 'third' , 'fourth' ,
1089
- 'fifth' , 'sixth' , 'seventh'
1090
- ],
1085
+ na_ser = pd .Categorical (
1086
+ ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' , np .NaN ],
1087
+ ['first' , 'second' , 'third' , 'fourth' , 'fifth' , 'sixth' ],
1091
1088
ordered = True
1092
1089
)
1093
1090
1094
1091
exp_top = pd .Series ([2. , 3. , 4. , 5. , 6. , 7. , 1. ])
1095
1092
exp_bot = pd .Series ([1. , 2. , 3. , 4. , 5. , 6. , 7. ])
1096
1093
exp_keep = pd .Series ([1. , 2. , 3. , 4. , 5. , 6. , np .NaN ])
1097
1094
1095
+ assert_series_equal (na_ser .rank (na_option = 'top' ), exp_top )
1096
+ assert_series_equal (na_ser .rank (na_option = 'bottom' ), exp_bot )
1097
+ assert_series_equal (na_ser .rank (na_option = 'keep' ), exp_keep )
1098
+
1099
+ # Test na_option for rank data with ascending False
1100
+ exp_top = pd .Series ([7. , 6. , 5. , 4. , 3. , 2. , 1. ])
1101
+ exp_bot = pd .Series ([6. , 5. , 4. , 3. , 2. , 1. , 7. ])
1102
+ exp_keep = pd .Series ([6. , 5. , 4. , 3. , 2. , 1. , np .NaN ])
1103
+
1098
1104
assert_series_equal (
1099
- na_ser .rank (na_option = 'top' ),
1105
+ na_ser .rank (na_option = 'top' , ascending = False ),
1100
1106
exp_top
1101
1107
)
1102
-
1103
1108
assert_series_equal (
1104
- na_ser .rank (na_option = 'bottom' ),
1109
+ na_ser .rank (na_option = 'bottom' , ascending = False ),
1105
1110
exp_bot
1106
1111
)
1107
-
1108
1112
assert_series_equal (
1109
- na_ser .rank (na_option = 'keep' ),
1113
+ na_ser .rank (na_option = 'keep' , ascending = False ),
1110
1114
exp_keep
1111
1115
)
1112
1116
1117
+ # Test with pct=True
1118
+ na_ser = pd .Categorical (
1119
+ ['first' , 'second' , 'third' , 'fourth' , np .NaN ],
1120
+ ['first' , 'second' , 'third' , 'fourth' ],
1121
+ ordered = True
1122
+ )
1123
+ exp_top = pd .Series ([0.4 , 0.6 , 0.8 , 1. , 0.2 ])
1124
+ exp_bot = pd .Series ([0.2 , 0.4 , 0.6 , 0.8 , 1. ])
1125
+ exp_keep = pd .Series ([0.25 , 0.5 , 0.75 , 1. , np .NaN ])
1126
+
1127
+ assert_series_equal (na_ser .rank (na_option = 'top' , pct = True ), exp_top )
1128
+ assert_series_equal (na_ser .rank (na_option = 'bottom' , pct = True ), exp_bot )
1129
+ assert_series_equal (na_ser .rank (na_option = 'keep' , pct = True ), exp_keep )
1130
+
1113
1131
def test_rank_signature (self ):
1114
1132
s = Series ([0 , 1 ])
1115
1133
s .rank (method = 'average' )
0 commit comments