return values for rank from categorical object

jeet63 · jeet63 · commit 062db8b65cce · 2017-02-18T21:36:02.000+05:30
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -578,11 +578,11 @@ Bug Fixes
 
 
 
+- Bug in ``.rank()`` which incorrectly ranks ordered categories (:issue:`15420`)
 
 
 
 - Bug in ``DataFrame.boxplot`` where ``fontsize`` was not applied to the tick labels on both axes (:issue:`15108`)
 - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`)
 - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`)
 - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`)
-- Bug in ``.rank()`` rank incorrectly orders ordered categories
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -620,10 +620,7 @@ def rank(values, axis=0, method='average', na_option='keep',
         Whether or not to the display the returned rankings in integer form
         (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1).
     """
-    if is_categorical(values):
-        ranks = values._rank(axis=axis, method=method, ascending=ascending,
-                             na_option=na_option, pct=pct)
-    elif values.ndim == 1:
+    if values.ndim == 1:
         f, values = _get_data_algo(values, _rank1d_functions)
         ranks = f(values, ties_method=method, ascending=ascending,
                   na_option=na_option, pct=pct)
@@ -991,6 +988,9 @@ def _get_data_algo(values, func_map):
     elif is_unsigned_integer_dtype(values):
         f = func_map['uint64']
         values = _ensure_uint64(values)
+    elif is_categorical(values) and values._ordered:
+        f = func_map['float64']
+        values = values._values_for_rank()
     else:
         values = _ensure_object(values)
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -1364,28 +1364,23 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
             return self._constructor(values=codes, categories=self.categories,
                                      ordered=self.ordered, fastpath=True)
 
-    def _rank(self, *args, **kwargs):
+    def _values_for_rank(self):
         """
         For correctly ranking ordered categorical data. See GH#15420
 
         Ordered categorical data should be ranked on the basis of
-        codes.
+        codes with -1 translated to NaN.
 
         Returns
         -------
         numpy array
 
         """
-        from pandas.core.algorithms import rank
-
+        values = self._codes.astype('float64')
         if self._ordered:
-            codes = self._codes.astype('float64')
-            na_mask = (codes == -1)
-            codes[na_mask] = np.nan
-            ranks = rank(codes, *args, **kwargs)
-        else:
-            ranks = rank(self.astype('object'), *args, **kwargs)
-        return ranks
+            na_mask = (values == -1)
+            values[na_mask] = np.nan
+        return values
 
     def order(self, inplace=False, ascending=True, na_position='last'):
         """
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -1079,8 +1079,9 @@ def test_rank_categorical(self):
             ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
             ordered=False
         )
+        exp_unordered = pd.Series([2., 4., 6., 3., 1., 5.])
         res = unordered.rank()
-        assert_series_equal(res, unordered.astype(object).rank())
+        assert_series_equal(res, exp_unordered)
 
         # Test na_option for rank data
         na_ser = pd.Series(