Skip to content

[ENH] nargsort handles EA with its _values_for_argsort #26854

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Jun 26, 2019
17 changes: 7 additions & 10 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
""" miscellaneous sorting / groupby utilities """
import warnings

import numpy as np

from pandas._libs import algos, hashtable, lib
Expand Down Expand Up @@ -238,13 +236,15 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
handles NaNs. It adds ascending and na_position parameters.
GH #6399, #5231
"""
from pandas.core.internals.arrays import extract_array

items = extract_array(items)
mask = np.asarray(isna(items))
# specially handle Categorical
if is_categorical_dtype(items):
if na_position not in {'first', 'last'}:
raise ValueError('invalid na_position: {!r}'.format(na_position))

mask = isna(items)
cnt_null = mask.sum()
sorted_idx = items.argsort(ascending=ascending, kind=kind)
if ascending and na_position == 'last':
Expand All @@ -255,15 +255,12 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'):
sorted_idx = np.roll(sorted_idx, cnt_null)
return sorted_idx

with warnings.catch_warnings():
# https://github.com/pandas-dev/pandas/issues/25439
# can be removed once ExtensionArrays are properly handled by nargsort
warnings.filterwarnings(
"ignore", category=FutureWarning,
message="Converting timezone-aware DatetimeArray to")
if is_extension_array_dtype(items):
items = items._values_for_argsort()
else:
items = np.asanyarray(items)

idx = np.arange(len(items))
mask = isna(items)
non_nans = items[~mask]
non_nan_idx = idx[~mask]
nan_idx = np.nonzero(mask)[0]
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/extension/base/methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest

import pandas as pd
from pandas.core.sorting import nargsort
import pandas.util.testing as tm

from .base import BaseExtensionTests
Expand Down Expand Up @@ -44,6 +45,15 @@ def test_argsort_missing(self, data_missing_for_sorting):
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
self.assert_series_equal(result, expected)

@pytest.mark.parametrize('na_position, expected', [
('last', np.array([2, 0, 1], dtype='int64')),
('first', np.array([1, 2, 0], dtype='int64'))
])
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
# GH 25439
result = nargsort(data_missing_for_sorting, na_position=na_position)
tm.assert_numpy_array_equal(result, expected)

@pytest.mark.parametrize('ascending', [True, False])
def test_sort_values(self, data_for_sorting, ascending):
ser = pd.Series(data_for_sorting)
Expand Down
10 changes: 1 addition & 9 deletions pandas/tests/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
from numpy import nan
import pytest

from pandas import (
DataFrame, MultiIndex, Series, array, concat, merge, to_datetime)
from pandas import DataFrame, MultiIndex, Series, array, concat, merge
from pandas.core import common as com
from pandas.core.sorting import (
decons_group_index, get_group_index, is_int64_overflow_possible,
Expand Down Expand Up @@ -181,13 +180,6 @@ def test_nargsort(self):
exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1))
tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False)

def test_nargsort_datetimearray_warning(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should we not leave this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW, I don't see the warning on master. It seems to have changed in the meantime. Probably fine to just remove.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure that's cool

# https://github.com/pandas-dev/pandas/issues/25439
# can be removed once the FutureWarning for np.array(DTA) is removed
data = to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels')
with tm.assert_produces_warning(None):
nargsort(data)


class TestMerge:

Expand Down