Skip to content

Commit 41abbe5

Browse files
nbonnottejreback
authored andcommitted
CLN: Moving Series.rank and DataFrame.rank to generic.py
closes #11924
1 parent f673af1 commit 41abbe5

File tree

8 files changed

+111
-80
lines changed

8 files changed

+111
-80
lines changed

doc/source/whatsnew/v0.18.0.txt

+35
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,41 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
280280
ser
281281
pd.Timestamp('2012-01-01') - ser
282282

283+
284+
Signature change for .rank
285+
^^^^^^^^^^^^^^^^^^^^^^^^^^
286+
287+
``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`)
288+
289+
Previous signature
290+
291+
.. code-block:: python
292+
293+
In [3]: pd.Series([0,1]).rank(method='average', na_option='keep',
294+
ascending=True, pct=False)
295+
Out[3]:
296+
0 1
297+
1 2
298+
dtype: float64
299+
300+
In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None,
301+
method='average', na_option='keep',
302+
ascending=True, pct=False)
303+
Out[4]:
304+
0
305+
0 1
306+
1 2
307+
308+
New signature
309+
310+
.. ipython:: python
311+
312+
pd.Series([0,1]).rank(axis=0, method='average', numeric_only=None,
313+
na_option='keep', ascending=True, pct=False)
314+
pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=None,
315+
na_option='keep', ascending=True, pct=False)
316+
317+
283318
Bug in QuarterBegin with n=0
284319
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
285320

pandas/core/frame.py

-49
Original file line numberDiff line numberDiff line change
@@ -5001,55 +5001,6 @@ def f(arr, per, interpolation):
50015001
result.name = None # For groupby, so it can set an index name
50025002
return result
50035003

5004-
def rank(self, axis=0, numeric_only=None, method='average',
5005-
na_option='keep', ascending=True, pct=False):
5006-
"""
5007-
Compute numerical data ranks (1 through n) along axis. Equal values are
5008-
assigned a rank that is the average of the ranks of those values
5009-
5010-
Parameters
5011-
----------
5012-
axis : {0 or 'index', 1 or 'columns'}, default 0
5013-
Ranks over columns (0) or rows (1)
5014-
numeric_only : boolean, default None
5015-
Include only float, int, boolean data
5016-
method : {'average', 'min', 'max', 'first', 'dense'}
5017-
* average: average rank of group
5018-
* min: lowest rank in group
5019-
* max: highest rank in group
5020-
* first: ranks assigned in order they appear in the array
5021-
* dense: like 'min', but rank always increases by 1 between groups
5022-
na_option : {'keep', 'top', 'bottom'}
5023-
* keep: leave NA values where they are
5024-
* top: smallest rank if ascending
5025-
* bottom: smallest rank if descending
5026-
ascending : boolean, default True
5027-
False for ranks by high (1) to low (N)
5028-
pct : boolean, default False
5029-
Computes percentage rank of data
5030-
5031-
Returns
5032-
-------
5033-
ranks : DataFrame
5034-
"""
5035-
axis = self._get_axis_number(axis)
5036-
if numeric_only is None:
5037-
try:
5038-
ranks = algos.rank(self.values, axis=axis, method=method,
5039-
ascending=ascending, na_option=na_option,
5040-
pct=pct)
5041-
return self._constructor(ranks, index=self.index,
5042-
columns=self.columns)
5043-
except TypeError:
5044-
numeric_only = True
5045-
if numeric_only:
5046-
data = self._get_numeric_data()
5047-
else:
5048-
data = self
5049-
ranks = algos.rank(data.values, axis=axis, method=method,
5050-
ascending=ascending, na_option=na_option, pct=pct)
5051-
return self._constructor(ranks, index=data.index, columns=data.columns)
5052-
50535004
def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
50545005
"""
50555006
Cast to DatetimeIndex of timestamps, at *beginning* of period

pandas/core/generic.py

+61
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas.tseries.index import DatetimeIndex
1616
from pandas.tseries.period import PeriodIndex
1717
from pandas.core.internals import BlockManager
18+
import pandas.core.algorithms as algos
1819
import pandas.core.common as com
1920
import pandas.core.missing as mis
2021
import pandas.core.datetools as datetools
@@ -3751,6 +3752,66 @@ def last(self, offset):
37513752
start = self.index.searchsorted(start_date, side='right')
37523753
return self.ix[start:]
37533754

3755+
def rank(self, axis=0, method='average', numeric_only=None,
3756+
na_option='keep', ascending=True, pct=False):
3757+
"""
3758+
Compute numerical data ranks (1 through n) along axis. Equal values are
3759+
assigned a rank that is the average of the ranks of those values
3760+
3761+
Parameters
3762+
----------
3763+
axis: {0 or 'index', 1 or 'columns'}, default 0
3764+
index to direct ranking
3765+
method : {'average', 'min', 'max', 'first', 'dense'}
3766+
* average: average rank of group
3767+
* min: lowest rank in group
3768+
* max: highest rank in group
3769+
* first: ranks assigned in order they appear in the array
3770+
* dense: like 'min', but rank always increases by 1 between groups
3771+
numeric_only : boolean, default None
3772+
Include only float, int, boolean data. Valid only for DataFrame or
3773+
Panel objects
3774+
na_option : {'keep', 'top', 'bottom'}
3775+
* keep: leave NA values where they are
3776+
* top: smallest rank if ascending
3777+
* bottom: smallest rank if descending
3778+
ascending : boolean, default True
3779+
False for ranks by high (1) to low (N)
3780+
pct : boolean, default False
3781+
Computes percentage rank of data
3782+
3783+
Returns
3784+
-------
3785+
ranks : same type as caller
3786+
"""
3787+
axis = self._get_axis_number(axis)
3788+
3789+
if self.ndim > 2:
3790+
msg = "rank does not make sense when ndim > 2"
3791+
raise NotImplementedError(msg)
3792+
3793+
def ranker(data):
3794+
ranks = algos.rank(data.values, axis=axis, method=method,
3795+
ascending=ascending, na_option=na_option,
3796+
pct=pct)
3797+
ranks = self._constructor(ranks, **data._construct_axes_dict())
3798+
return ranks.__finalize__(self)
3799+
3800+
# if numeric_only is None, and we can't get anything, we try with
3801+
# numeric_only=True
3802+
if numeric_only is None:
3803+
try:
3804+
return ranker(self)
3805+
except TypeError:
3806+
numeric_only = True
3807+
3808+
if numeric_only:
3809+
data = self._get_numeric_data()
3810+
else:
3811+
data = self
3812+
3813+
return ranker(data)
3814+
37543815
_shared_docs['align'] = ("""
37553816
Align two object on their axes with the
37563817
specified join method for each axis Index

pandas/core/series.py

-30
Original file line numberDiff line numberDiff line change
@@ -1865,36 +1865,6 @@ def argsort(self, axis=0, kind='quicksort', order=None):
18651865
np.argsort(values, kind=kind), index=self.index,
18661866
dtype='int64').__finalize__(self)
18671867

1868-
def rank(self, method='average', na_option='keep', ascending=True,
1869-
pct=False):
1870-
"""
1871-
Compute data ranks (1 through n). Equal values are assigned a rank that
1872-
is the average of the ranks of those values
1873-
1874-
Parameters
1875-
----------
1876-
method : {'average', 'min', 'max', 'first', 'dense'}
1877-
* average: average rank of group
1878-
* min: lowest rank in group
1879-
* max: highest rank in group
1880-
* first: ranks assigned in order they appear in the array
1881-
* dense: like 'min', but rank always increases by 1 between groups
1882-
na_option : {'keep'}
1883-
keep: leave NA values where they are
1884-
ascending : boolean, default True
1885-
False for ranks by high (1) to low (N)
1886-
pct : boolean, default False
1887-
Computes percentage rank of data
1888-
1889-
Returns
1890-
-------
1891-
ranks : Series
1892-
"""
1893-
ranks = algorithms.rank(self._values, method=method,
1894-
na_option=na_option, ascending=ascending,
1895-
pct=pct)
1896-
return self._constructor(ranks, index=self.index).__finalize__(self)
1897-
18981868
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
18991869
False: 'first'})
19001870
def nlargest(self, n=5, keep='first'):

pandas/tests/frame/test_analytics.py

+6
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,12 @@ def test_rank_na_option(self):
875875
assert_almost_equal(ranks0.values, exp0)
876876
assert_almost_equal(ranks1.values, exp1)
877877

878+
def test_rank_axis(self):
879+
# check if using axes' names gives the same result
880+
df = pd.DataFrame([[2, 1], [4, 3]])
881+
assert_frame_equal(df.rank(axis=0), df.rank(axis='index'))
882+
assert_frame_equal(df.rank(axis=1), df.rank(axis='columns'))
883+
878884
def test_sem(self):
879885
alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
880886
self._check_stat_op('sem', alt)

pandas/tests/series/test_analytics.py

+5
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,11 @@ def test_rank(self):
10251025
iranks = iseries.rank()
10261026
assert_series_equal(iranks, exp)
10271027

1028+
def test_rank_signature(self):
1029+
s = Series([0, 1])
1030+
s.rank(method='average')
1031+
self.assertRaises(ValueError, s.rank, 'average')
1032+
10281033
def test_rank_inf(self):
10291034
raise nose.SkipTest('DataFrame.rank does not currently rank '
10301035
'np.inf and -np.inf properly')

pandas/tests/test_panel.py

+3
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ def test_pickle(self):
5656
unpickled = self.round_trip_pickle(self.panel)
5757
assert_frame_equal(unpickled['ItemA'], self.panel['ItemA'])
5858

59+
def test_rank(self):
60+
self.assertRaises(NotImplementedError, lambda: self.panel.rank())
61+
5962
def test_cumsum(self):
6063
cumsum = self.panel.cumsum()
6164
assert_frame_equal(cumsum['ItemA'], self.panel['ItemA'].cumsum())

pandas/tests/test_stats.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def test_rank_methods_series(self):
5959
ts = Series(vals, index=index)
6060

6161
for m in ['average', 'min', 'max', 'first', 'dense']:
62-
result = ts.rank(m)
62+
result = ts.rank(method=m)
6363
sprank = rankdata(vals, m if m != 'first' else 'ordinal')
6464
tm.assert_series_equal(result, Series(sprank, index=index))
6565

0 commit comments

Comments
 (0)