CLN: Moving Series.rank and DataFrame.rank to generic.py

nbonnotte · jreback · commit 41abbe5b97fd · 2016-01-28T16:41:06.000-05:00
closes #11924
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -280,6 +280,41 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
    ser
    pd.Timestamp('2012-01-01') - ser
 
+
+Signature change for .rank
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`)
+
+Previous signature
+
+.. code-block:: python
+
+   In [3]: pd.Series([0,1]).rank(method='average', na_option='keep',
+                                 ascending=True, pct=False)
+   Out[3]:
+   0    1
+   1    2
+   dtype: float64
+
+   In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None,
+                                    method='average', na_option='keep',
+                                    ascending=True, pct=False)
+   Out[4]:
+      0
+   0  1
+   1  2
+
+New signature
+
+.. ipython:: python
+
+   pd.Series([0,1]).rank(axis=0, method='average', numeric_only=None,
+                         na_option='keep', ascending=True, pct=False)
+   pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=None,
+                            na_option='keep', ascending=True, pct=False)
+
+
 Bug in QuarterBegin with n=0
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -5001,55 +5001,6 @@ def f(arr, per, interpolation):
             result.name = None  # For groupby, so it can set an index name
         return result
 
-    def rank(self, axis=0, numeric_only=None, method='average',
-             na_option='keep', ascending=True, pct=False):
-        """
-        Compute numerical data ranks (1 through n) along axis. Equal values are
-        assigned a rank that is the average of the ranks of those values
-
-        Parameters
-        ----------
-        axis : {0 or 'index', 1 or 'columns'}, default 0
-            Ranks over columns (0) or rows (1)
-        numeric_only : boolean, default None
-            Include only float, int, boolean data
-        method : {'average', 'min', 'max', 'first', 'dense'}
-            * average: average rank of group
-            * min: lowest rank in group
-            * max: highest rank in group
-            * first: ranks assigned in order they appear in the array
-            * dense: like 'min', but rank always increases by 1 between groups
-        na_option : {'keep', 'top', 'bottom'}
-            * keep: leave NA values where they are
-            * top: smallest rank if ascending
-            * bottom: smallest rank if descending
-        ascending : boolean, default True
-            False for ranks by high (1) to low (N)
-        pct : boolean, default False
-            Computes percentage rank of data
-
-        Returns
-        -------
-        ranks : DataFrame
-        """
-        axis = self._get_axis_number(axis)
-        if numeric_only is None:
-            try:
-                ranks = algos.rank(self.values, axis=axis, method=method,
-                                   ascending=ascending, na_option=na_option,
-                                   pct=pct)
-                return self._constructor(ranks, index=self.index,
-                                         columns=self.columns)
-            except TypeError:
-                numeric_only = True
-        if numeric_only:
-            data = self._get_numeric_data()
-        else:
-            data = self
-        ranks = algos.rank(data.values, axis=axis, method=method,
-                           ascending=ascending, na_option=na_option, pct=pct)
-        return self._constructor(ranks, index=data.index, columns=data.columns)
-
     def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
         """
         Cast to DatetimeIndex of timestamps, at *beginning* of period
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -15,6 +15,7 @@
 from pandas.tseries.index import DatetimeIndex
 from pandas.tseries.period import PeriodIndex
 from pandas.core.internals import BlockManager
+import pandas.core.algorithms as algos
 import pandas.core.common as com
 import pandas.core.missing as mis
 import pandas.core.datetools as datetools
@@ -3751,6 +3752,66 @@ def last(self, offset):
         start = self.index.searchsorted(start_date, side='right')
         return self.ix[start:]
 
+    def rank(self, axis=0, method='average', numeric_only=None,
+             na_option='keep', ascending=True, pct=False):
+        """
+        Compute numerical data ranks (1 through n) along axis. Equal values are
+        assigned a rank that is the average of the ranks of those values
+
+        Parameters
+        ----------
+        axis: {0 or 'index', 1 or 'columns'}, default 0
+            index to direct ranking
+        method : {'average', 'min', 'max', 'first', 'dense'}
+            * average: average rank of group
+            * min: lowest rank in group
+            * max: highest rank in group
+            * first: ranks assigned in order they appear in the array
+            * dense: like 'min', but rank always increases by 1 between groups
+        numeric_only : boolean, default None
+            Include only float, int, boolean data. Valid only for DataFrame or
+            Panel objects
+        na_option : {'keep', 'top', 'bottom'}
+            * keep: leave NA values where they are
+            * top: smallest rank if ascending
+            * bottom: smallest rank if descending
+        ascending : boolean, default True
+            False for ranks by high (1) to low (N)
+        pct : boolean, default False
+            Computes percentage rank of data
+
+        Returns
+        -------
+        ranks : same type as caller
+        """
+        axis = self._get_axis_number(axis)
+
+        if self.ndim > 2:
+            msg = "rank does not make sense when ndim > 2"
+            raise NotImplementedError(msg)
+
+        def ranker(data):
+            ranks = algos.rank(data.values, axis=axis, method=method,
+                               ascending=ascending, na_option=na_option,
+                               pct=pct)
+            ranks = self._constructor(ranks, **data._construct_axes_dict())
+            return ranks.__finalize__(self)
+
+        # if numeric_only is None, and we can't get anything, we try with
+        # numeric_only=True
+        if numeric_only is None:
+            try:
+                return ranker(self)
+            except TypeError:
+                numeric_only = True
+
+        if numeric_only:
+            data = self._get_numeric_data()
+        else:
+            data = self
+
+        return ranker(data)
+
     _shared_docs['align'] = ("""
         Align two object on their axes with the
         specified join method for each axis Index
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1865,36 +1865,6 @@ def argsort(self, axis=0, kind='quicksort', order=None):
                 np.argsort(values, kind=kind), index=self.index,
                 dtype='int64').__finalize__(self)
 
-    def rank(self, method='average', na_option='keep', ascending=True,
-             pct=False):
-        """
-        Compute data ranks (1 through n). Equal values are assigned a rank that
-        is the average of the ranks of those values
-
-        Parameters
-        ----------
-        method : {'average', 'min', 'max', 'first', 'dense'}
-            * average: average rank of group
-            * min: lowest rank in group
-            * max: highest rank in group
-            * first: ranks assigned in order they appear in the array
-            * dense: like 'min', but rank always increases by 1 between groups
-        na_option : {'keep'}
-            keep: leave NA values where they are
-        ascending : boolean, default True
-            False for ranks by high (1) to low (N)
-        pct : boolean, default False
-            Computes percentage rank of data
-
-        Returns
-        -------
-        ranks : Series
-        """
-        ranks = algorithms.rank(self._values, method=method,
-                                na_option=na_option, ascending=ascending,
-                                pct=pct)
-        return self._constructor(ranks, index=self.index).__finalize__(self)
-
     @deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
                                                    False: 'first'})
     def nlargest(self, n=5, keep='first'):
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -875,6 +875,12 @@ def test_rank_na_option(self):
         assert_almost_equal(ranks0.values, exp0)
         assert_almost_equal(ranks1.values, exp1)
 
+    def test_rank_axis(self):
+        # check if using axes' names gives the same result
+        df = pd.DataFrame([[2, 1], [4, 3]])
+        assert_frame_equal(df.rank(axis=0), df.rank(axis='index'))
+        assert_frame_equal(df.rank(axis=1), df.rank(axis='columns'))
+
     def test_sem(self):
         alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
         self._check_stat_op('sem', alt)
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
@@ -1025,6 +1025,11 @@ def test_rank(self):
         iranks = iseries.rank()
         assert_series_equal(iranks, exp)
 
+    def test_rank_signature(self):
+        s = Series([0, 1])
+        s.rank(method='average')
+        self.assertRaises(ValueError, s.rank, 'average')
+
     def test_rank_inf(self):
         raise nose.SkipTest('DataFrame.rank does not currently rank '
                             'np.inf and -np.inf properly')
diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
@@ -56,6 +56,9 @@ def test_pickle(self):
         unpickled = self.round_trip_pickle(self.panel)
         assert_frame_equal(unpickled['ItemA'], self.panel['ItemA'])
 
+    def test_rank(self):
+        self.assertRaises(NotImplementedError, lambda: self.panel.rank())
+
     def test_cumsum(self):
         cumsum = self.panel.cumsum()
         assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum())
diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py
@@ -59,7 +59,7 @@ def test_rank_methods_series(self):
             ts = Series(vals, index=index)
 
             for m in ['average', 'min', 'max', 'first', 'dense']:
-                result = ts.rank(m)
+                result = ts.rank(method=m)
                 sprank = rankdata(vals, m if m != 'first' else 'ordinal')
                 tm.assert_series_equal(result, Series(sprank, index=index))