diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e83f149db1f18..2a08ca847b39d 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -1022,6 +1022,7 @@ Numeric - Bug in :class:`Index` constructor with ``dtype='uint64'`` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`) - Bug in :class:`DataFrame` flex arithmetic (e.g. ``df.add(other, fill_value=foo)``) with a ``fill_value`` other than ``None`` failed to raise ``NotImplementedError`` in corner cases where either the frame or ``other`` has length zero (:issue:`19522`) - Multiplication and division of numeric-dtyped :class:`Index` objects with timedelta-like scalars returns ``TimedeltaIndex`` instead of raising ``TypeError`` (:issue:`19333`) +- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``ascending='False'`` failed to return correct ranks for infinity if ``NaN`` were present (:issue:`19538`) - Bug where ``NaN`` was returned instead of 0 by :func:`Series.pct_change` and :func:`DataFrame.pct_change` when ``fill_method`` is not ``None`` (:issue:`19873`) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 9348d7525c307..b2551f3733904 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -135,7 +135,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, sorted_data = values.take(_as) sorted_mask = mask.take(_as) - _indices = order[1].take(_as).nonzero()[0] + _indices = np.diff(sorted_mask).nonzero()[0] non_na_idx = _indices[0] if len(_indices) > 0 else -1 argsorted = _as.astype('i8') @@ -153,7 +153,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, if (i == n - 1 or are_diff(util.get_value_at(sorted_data, i + 1), val) or - i == non_na_idx - 1): + i == non_na_idx): if tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = sum_ranks / dups @@ -190,7 +190,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, count += 1.0 if (i == n - 1 or sorted_data[i + 1] != val or - i == non_na_idx - 1): + i == non_na_idx): if tiebreak == TIEBREAK_AVERAGE: for j in range(i - dups + 1, i + 1): ranks[argsorted[j]] = sum_ranks / dups diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index d15325ca8ef0e..004e42e14cb93 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -16,6 +16,8 @@ from pandas.tests.series.common import TestData from pandas._libs.tslib import iNaT from pandas._libs.algos import Infinity, NegInfinity +from itertools import chain +import pandas.util._test_decorators as td class TestSeriesRank(TestData): @@ -257,38 +259,52 @@ def _check(s, expected, method='average'): series = s if dtype is None else s.astype(dtype) _check(series, results[method], method=method) - def test_rank_tie_methods_on_infs_nans(self): + @td.skip_if_no_scipy + @pytest.mark.parametrize('ascending', [True, False]) + @pytest.mark.parametrize('method', ['average', 'min', 'max', 'first', + 'dense']) + @pytest.mark.parametrize('na_option', ['top', 'bottom', 'keep']) + def test_rank_tie_methods_on_infs_nans(self, method, na_option, ascending): dtypes = [('object', None, Infinity(), NegInfinity()), ('float64', np.nan, np.inf, -np.inf)] chunk = 3 disabled = set([('object', 'first')]) - def _check(s, expected, method='average', na_option='keep'): - result = s.rank(method=method, na_option=na_option) + def _check(s, method, na_option, ascending): + exp_ranks = { + 'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]), + 'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]), + 'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]), + 'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]), + 'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3]) + } + ranks = exp_ranks[method] + if na_option == 'top': + order = [ranks[1], ranks[0], ranks[2]] + elif na_option == 'bottom': + order = [ranks[0], ranks[2], ranks[1]] + else: + order = [ranks[0], [np.nan] * chunk, ranks[1]] + expected = order if ascending else order[::-1] + expected = list(chain.from_iterable(expected)) + result = s.rank(method=method, na_option=na_option, + ascending=ascending) tm.assert_series_equal(result, Series(expected, dtype='float64')) - exp_ranks = { - 'average': ([2, 2, 2], [5, 5, 5], [8, 8, 8]), - 'min': ([1, 1, 1], [4, 4, 4], [7, 7, 7]), - 'max': ([3, 3, 3], [6, 6, 6], [9, 9, 9]), - 'first': ([1, 2, 3], [4, 5, 6], [7, 8, 9]), - 'dense': ([1, 1, 1], [2, 2, 2], [3, 3, 3]) - } - na_options = ('top', 'bottom', 'keep') for dtype, na_value, pos_inf, neg_inf in dtypes: in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk iseries = Series(in_arr, dtype=dtype) - for method, na_opt in product(exp_ranks.keys(), na_options): - ranks = exp_ranks[method] - if (dtype, method) in disabled: - continue - if na_opt == 'top': - order = ranks[1] + ranks[0] + ranks[2] - elif na_opt == 'bottom': - order = ranks[0] + ranks[2] + ranks[1] - else: - order = ranks[0] + [np.nan] * chunk + ranks[1] - _check(iseries, order, method, na_opt) + if (dtype, method) in disabled: + continue + _check(iseries, method, na_option, ascending) + + def test_rank_desc_mix_nans_infs(self): + # GH 19538 + # check descending ranking when mix nans and infs + iseries = Series([1, np.nan, np.inf, -np.inf, 25]) + result = iseries.rank(ascending=False) + exp = Series([3, np.nan, 1, 4, 2], dtype='float64') + tm.assert_series_equal(result, exp) def test_rank_methods_series(self): pytest.importorskip('scipy.stats.special')