Skip to content

CLN: Moving Series.rank and DataFrame.rank to generic.py #11924

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,36 @@ Subtraction by ``Timedelta`` in a ``Series`` by a ``Timestamp`` works (:issue:`1
ser
pd.Timestamp('2012-01-01') - ser


Signature change for .rank
^^^^^^^^^^^^^^^^^^^^^^^^^^

``Series.rank`` and ``DataFrame.rank`` now have the same signature (:issue:`11759`)

Previous signature

.. code-block:: python

In [3]: pd.Series([0,1]).rank(method='average', na_option='keep', ascending=True, pct=False)
Out[3]:
0 1
1 2
dtype: float64

In [4]: pd.DataFrame([0,1]).rank(axis=0, numeric_only=None, method='average', na_option='keep', ascending=True, pct=False)
Out[4]:
0
0 1
1 2

New signature

.. ipython:: python

pd.Series([0,1]).rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)
pd.DataFrame([0,1]).rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)


Bug in QuarterBegin with n=0
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
49 changes: 0 additions & 49 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5005,55 +5005,6 @@ def f(arr, per, interpolation):
result.name = None # For groupby, so it can set an index name
return result

def rank(self, axis=0, numeric_only=None, method='average',
na_option='keep', ascending=True, pct=False):
"""
Compute numerical data ranks (1 through n) along axis. Equal values are
assigned a rank that is the average of the ranks of those values

Parameters
----------
axis : {0 or 'index', 1 or 'columns'}, default 0
Ranks over columns (0) or rows (1)
numeric_only : boolean, default None
Include only float, int, boolean data
method : {'average', 'min', 'max', 'first', 'dense'}
* average: average rank of group
* min: lowest rank in group
* max: highest rank in group
* first: ranks assigned in order they appear in the array
* dense: like 'min', but rank always increases by 1 between groups
na_option : {'keep', 'top', 'bottom'}
* keep: leave NA values where they are
* top: smallest rank if ascending
* bottom: smallest rank if descending
ascending : boolean, default True
False for ranks by high (1) to low (N)
pct : boolean, default False
Computes percentage rank of data

Returns
-------
ranks : DataFrame
"""
axis = self._get_axis_number(axis)
if numeric_only is None:
try:
ranks = algos.rank(self.values, axis=axis, method=method,
ascending=ascending, na_option=na_option,
pct=pct)
return self._constructor(ranks, index=self.index,
columns=self.columns)
except TypeError:
numeric_only = True
if numeric_only:
data = self._get_numeric_data()
else:
data = self
ranks = algos.rank(data.values, axis=axis, method=method,
ascending=ascending, na_option=na_option, pct=pct)
return self._constructor(ranks, index=data.index, columns=data.columns)

def to_timestamp(self, freq=None, how='start', axis=0, copy=True):
"""
Cast to DatetimeIndex of timestamps, at *beginning* of period
Expand Down
61 changes: 61 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas.tseries.index import DatetimeIndex
from pandas.tseries.period import PeriodIndex
from pandas.core.internals import BlockManager
import pandas.core.algorithms as algos
import pandas.core.common as com
import pandas.core.missing as mis
import pandas.core.datetools as datetools
Expand Down Expand Up @@ -3751,6 +3752,66 @@ def last(self, offset):
start = self.index.searchsorted(start_date, side='right')
return self.ix[start:]

def rank(self, axis=0, method='average', numeric_only=None,
na_option='keep', ascending=True, pct=False):
"""
Compute numerical data ranks (1 through n) along axis. Equal values are
assigned a rank that is the average of the ranks of those values

Parameters
----------
axis: {0 or 'index', 1 or 'columns'}, default 0
index to direct ranking
method : {'average', 'min', 'max', 'first', 'dense'}
* average: average rank of group
* min: lowest rank in group
* max: highest rank in group
* first: ranks assigned in order they appear in the array
* dense: like 'min', but rank always increases by 1 between groups
numeric_only : boolean, default None
Include only float, int, boolean data. Valid only for DataFrame or
Panel objects
na_option : {'keep', 'top', 'bottom'}
* keep: leave NA values where they are
* top: smallest rank if ascending
* bottom: smallest rank if descending
ascending : boolean, default True
False for ranks by high (1) to low (N)
pct : boolean, default False
Computes percentage rank of data

Returns
-------
ranks : same type as caller
"""
axis = self._get_axis_number(axis)

if self.ndim > 2:
msg = "rank does not make sense when ndim > 2"
raise NotImplementedError(msg)

def ranker(data):
ranks = algos.rank(data.values, axis=axis, method=method,
ascending=ascending, na_option=na_option,
pct=pct)
ranks = self._constructor(ranks, **data._construct_axes_dict())
return ranks.__finalize__(self)

# if numeric_only is None, and we can't get anything, we try with
# numeric_only=True
if numeric_only is None:
try:
return ranker(self)
except TypeError:
numeric_only = True

if numeric_only:
data = self._get_numeric_data()
else:
data = self

return ranker(data)

_shared_docs['align'] = ("""
Align two object on their axes with the
specified join method for each axis Index
Expand Down
30 changes: 0 additions & 30 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1865,36 +1865,6 @@ def argsort(self, axis=0, kind='quicksort', order=None):
np.argsort(values, kind=kind), index=self.index,
dtype='int64').__finalize__(self)

def rank(self, method='average', na_option='keep', ascending=True,
pct=False):
"""
Compute data ranks (1 through n). Equal values are assigned a rank that
is the average of the ranks of those values

Parameters
----------
method : {'average', 'min', 'max', 'first', 'dense'}
* average: average rank of group
* min: lowest rank in group
* max: highest rank in group
* first: ranks assigned in order they appear in the array
* dense: like 'min', but rank always increases by 1 between groups
na_option : {'keep'}
keep: leave NA values where they are
ascending : boolean, default True
False for ranks by high (1) to low (N)
pct : boolean, default False
Computes percentage rank of data

Returns
-------
ranks : Series
"""
ranks = algorithms.rank(self._values, method=method,
na_option=na_option, ascending=ascending,
pct=pct)
return self._constructor(ranks, index=self.index).__finalize__(self)

@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
False: 'first'})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that we should have the signature:

def rank(self, method='average', axis=0, numeric_only=None, na_option='keep', ascending=True, pct=False)

this will be more consistent with other methods.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've modified the signature

def nlargest(self, n=5, keep='first'):
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,12 @@ def test_rank_na_option(self):
assert_almost_equal(ranks0.values, exp0)
assert_almost_equal(ranks1.values, exp1)

def test_rank_axis(self):
# check if using axes' names gives the same result
df = pd.DataFrame([[2, 1], [4, 3]])
assert_frame_equal(df.rank(axis=0), df.rank(axis='index'))
assert_frame_equal(df.rank(axis=1), df.rank(axis='columns'))

def test_sem(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add onto the rank tests in tests/series/test_analytics.py simulating the original signature (and asserting an error), e.g.
someone tries. s.rank('average') e.g. w/o the kw. just for a back-compat that it raises the correct kind of error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x))
self._check_stat_op('sem', alt)
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/series/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,11 @@ def test_rank(self):
iranks = iseries.rank()
assert_series_equal(iranks, exp)

def test_rank_signature(self):
s = Series([0, 1])
s.rank(method='average')
self.assertRaises(ValueError, s.rank, 'average')

def test_rank_inf(self):
raise nose.SkipTest('DataFrame.rank does not currently rank '
'np.inf and -np.inf properly')
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_rank_methods_series(self):
ts = Series(vals, index=index)

for m in ['average', 'min', 'max', 'first', 'dense']:
result = ts.rank(m)
result = ts.rank(method=m)
sprank = rankdata(vals, m if m != 'first' else 'ordinal')
tm.assert_series_equal(result, Series(sprank, index=index))

Expand Down