Skip to content

FIX: add support for desc order when ranking infs with nans #19538 #20091

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 30, 2018
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,7 @@ Numeric
- Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
- Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`)
- Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`)
- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`)
- Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`)


Expand Down
6 changes: 3 additions & 3 deletions pandas/_libs/algos_rank_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,

sorted_data = values.take(_as)
sorted_mask = mask.take(_as)
_indices = order[1].take(_as).nonzero()[0]
_indices = np.diff(sorted_mask).nonzero()[0]
non_na_idx = _indices[0] if len(_indices) > 0 else -1
argsorted = _as.astype('i8')

Expand All @@ -153,7 +153,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,

if (i == n - 1 or
are_diff(util.get_value_at(sorted_data, i + 1), val) or
i == non_na_idx - 1):
i == non_na_idx):
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand Down Expand Up @@ -190,7 +190,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True,
count += 1.0

if (i == n - 1 or sorted_data[i + 1] != val or
i == non_na_idx - 1):
i == non_na_idx):
if tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
ranks[argsorted[j]] = sum_ranks / dups
Expand Down
60 changes: 38 additions & 22 deletions pandas/tests/series/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from pandas.tests.series.common import TestData
from pandas._libs.tslib import iNaT
from pandas._libs.algos import Infinity, NegInfinity
from itertools import chain
import pandas.util._test_decorators as td


class TestSeriesRank(TestData):
Expand Down Expand Up @@ -257,38 +259,52 @@ def _check(s, expected, method='average'):
series = s if dtype is None else s.astype(dtype)
_check(series, results[method], method=method)

def test_rank_tie_methods_on_infs_nans(self):
@td.skip_if_no_scipy
@pytest.mark.parametrize('ascending', [True, False])
@pytest.mark.parametrize('method', ['average', 'min', 'max', 'first',
'dense'])
@pytest.mark.parametrize('na_option', ['top', 'bottom', 'keep'])
def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending):
dtypes = [('object', None, Infinity(), NegInfinity()),
('float64', np.nan, np.inf, -np.inf)]
chunk = 3
disabled = set([('object', 'first')])

def _check(s, expected, method='average', na_option='keep'):
result = s.rank(method=method, na_option=na_option)
def _check(s, method, na_option, ascending):
exp_ranks = {
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
}
ranks = exp_ranks[method]
if na_option == 'top':
order = [ranks[1], ranks[0], ranks[2]]
elif na_option == 'bottom':
order = [ranks[0], ranks[2], ranks[1]]
else:
order = [ranks[0], [np.nan] * chunk, ranks[1]]
expected = order if ascending else order[::-1]
expected = list(chain.from_iterable(expected))
result = s.rank(method=method, na_option=na_option,
ascending=ascending)
tm.assert_series_equal(result, Series(expected, dtype='float64'))

exp_ranks = {
'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3])
}
na_options = ('top', 'bottom', 'keep')
for dtype, na_value, pos_inf, neg_inf in dtypes:
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
iseries = Series(in_arr, dtype=dtype)
for method, na_opt in product(exp_ranks.keys(), na_options):
ranks = exp_ranks[method]
if (dtype, method) in disabled:
continue
if na_opt == 'top':
order = ranks[1] + ranks[0] + ranks[2]
elif na_opt == 'bottom':
order = ranks[0] + ranks[2] + ranks[1]
else:
order = ranks[0] + [np.nan] * chunk + ranks[1]
_check(iseries, order, method, na_opt)
if (dtype, method) in disabled:
continue
_check(iseries, method, na_option, ascending)

def test_rank_desc_mix_nans_infs(self):
# GH 19538
# check descending ranking when mix nans and infs
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
result = iseries.rank(ascending=False)
exp = Series([3, np.nan, 1, 4, 2], dtype='float64')
tm.assert_series_equal(result, exp)

def test_rank_methods_series(self):
pytest.importorskip('scipy.stats.special')
Expand Down