diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 21c0c8f747b10..750a4c903176f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -1,6 +1,4 @@ """ miscellaneous sorting / groupby utilities """ -import warnings - import numpy as np from pandas._libs import algos, hashtable, lib @@ -238,13 +236,15 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): handles NaNs. It adds ascending and na_position parameters. GH #6399, #5231 """ + from pandas.core.internals.arrays import extract_array + items = extract_array(items) + mask = np.asarray(isna(items)) # specially handle Categorical if is_categorical_dtype(items): if na_position not in {'first', 'last'}: raise ValueError('invalid na_position: {!r}'.format(na_position)) - mask = isna(items) cnt_null = mask.sum() sorted_idx = items.argsort(ascending=ascending, kind=kind) if ascending and na_position == 'last': @@ -255,15 +255,12 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): sorted_idx = np.roll(sorted_idx, cnt_null) return sorted_idx - with warnings.catch_warnings(): - # https://github.com/pandas-dev/pandas/issues/25439 - # can be removed once ExtensionArrays are properly handled by nargsort - warnings.filterwarnings( - "ignore", category=FutureWarning, - message="Converting timezone-aware DatetimeArray to") + if is_extension_array_dtype(items): + items = items._values_for_argsort() + else: items = np.asanyarray(items) + idx = np.arange(len(items)) - mask = isna(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 1852edaa9e748..5ec26d19c7758 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -2,6 +2,7 @@ import pytest import pandas as pd +from pandas.core.sorting import nargsort import pandas.util.testing as tm from .base import BaseExtensionTests @@ -44,6 +45,15 @@ def test_argsort_missing(self, data_missing_for_sorting): expected = pd.Series(np.array([1, -1, 0], dtype=np.int64)) self.assert_series_equal(result, expected) + @pytest.mark.parametrize('na_position, expected', [ + ('last', np.array([2, 0, 1], dtype='int64')), + ('first', np.array([1, 2, 0], dtype='int64')) + ]) + def test_nargsort(self, data_missing_for_sorting, na_position, expected): + # GH 25439 + result = nargsort(data_missing_for_sorting, na_position=na_position) + tm.assert_numpy_array_equal(result, expected) + @pytest.mark.parametrize('ascending', [True, False]) def test_sort_values(self, data_for_sorting, ascending): ser = pd.Series(data_for_sorting) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 75fa37eb9af09..f198fb6ae57b1 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -6,8 +6,7 @@ from numpy import nan import pytest -from pandas import ( - DataFrame, MultiIndex, Series, array, concat, merge, to_datetime) +from pandas import DataFrame, MultiIndex, Series, array, concat, merge from pandas.core import common as com from pandas.core.sorting import ( decons_group_index, get_group_index, is_int64_overflow_possible, @@ -181,13 +180,6 @@ def test_nargsort(self): exp = list(range(5)) + list(range(105, 110)) + list(range(104, 4, -1)) tm.assert_numpy_array_equal(result, np.array(exp), check_dtype=False) - def test_nargsort_datetimearray_warning(self): - # https://github.com/pandas-dev/pandas/issues/25439 - # can be removed once the FutureWarning for np.array(DTA) is removed - data = to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels') - with tm.assert_produces_warning(None): - nargsort(data) - class TestMerge: