-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
API: ExtensionArray.argsort places the missing value at the end #26354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
c22c01b
93ac589
b401cba
aa50eed
d38a5e1
bae30fd
effe18c
d1dd5fa
bf1e927
1925423
c57dff7
16cff4e
bc713cd
b10e5f9
7811a08
c172ba1
d9660b1
c333e8c
315e6f2
ebdfac2
09638f4
0dc48a4
7186d9d
84f87a8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
|
||
from pandas._typing import ArrayLike | ||
from pandas.core import ops | ||
from pandas.core.sorting import nargsort | ||
|
||
_not_implemented_message = "{} does not implement {}." | ||
|
||
|
@@ -364,13 +365,11 @@ def isna(self) -> ArrayLike: | |
def _values_for_argsort(self) -> np.ndarray: | ||
""" | ||
Return values for sorting. | ||
|
||
Returns | ||
------- | ||
ndarray | ||
The transformed values should maintain the ordering between values | ||
within the array. | ||
|
||
See Also | ||
-------- | ||
ExtensionArray.argsort | ||
|
@@ -403,13 +402,13 @@ def argsort(self, ascending=True, kind='quicksort', *args, **kwargs): | |
""" | ||
# Implementor note: You have two places to override the behavior of | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you augment the doc-string for argsort / _values_for_argsort to say that NaNs are placed at the end. |
||
# argsort. | ||
# 1. _values_for_argsort : construct the values passed to np.argsort | ||
# 1. _values_for_argsort : construct the values passed to np.argsort. | ||
# 2. argsort : total control over sorting. | ||
ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs) | ||
|
||
values = self._values_for_argsort() | ||
result = np.argsort(values, kind=kind, **kwargs) | ||
if not ascending: | ||
result = result[::-1] | ||
result = nargsort(values, kind=kind, ascending=ascending, | ||
na_position='last') | ||
return result | ||
|
||
def fillna(self, value=None, method=None, limit=None): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -501,23 +501,6 @@ def value_counts(self, dropna=True): | |
|
||
return Series(array, index=index) | ||
|
||
def _values_for_argsort(self) -> np.ndarray: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. does we have a benchmark for sorting Int64? so does this change things? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't found the benchmark for
master:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add this benchmark (make the arrays longer though) |
||
"""Return values for sorting. | ||
|
||
Returns | ||
------- | ||
ndarray | ||
The transformed values should maintain the ordering between values | ||
within the array. | ||
|
||
See Also | ||
-------- | ||
ExtensionArray.argsort | ||
""" | ||
data = self._data.copy() | ||
data[self._mask] = data.min() - 1 | ||
return data | ||
|
||
@classmethod | ||
def _create_comparison_method(cls, op): | ||
def cmp_method(self, other): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -193,6 +193,11 @@ def test_searchsorted(self, data_for_sorting): | |
if not data_for_sorting.ordered: | ||
raise pytest.skip(reason="searchsorted requires ordered data.") | ||
|
||
def test_argsort_nan_last(self, data_missing_for_sorting): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this being skipped? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The The corresponding test for |
||
# GH 21801 | ||
# TODO: Categorical.argsort places NA values at the end | ||
pass | ||
|
||
|
||
class TestCasting(base.BaseCastingTests): | ||
pass | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,7 +7,8 @@ | |
import pytest | ||
|
||
from pandas import ( | ||
DataFrame, MultiIndex, Series, array, concat, merge, to_datetime) | ||
Categorical, DataFrame, MultiIndex, Series, array, concat, merge, | ||
to_datetime) | ||
from pandas.core import common as com | ||
from pandas.core.sorting import ( | ||
decons_group_index, get_group_index, is_int64_overflow_possible, | ||
|
@@ -188,6 +189,14 @@ def test_nargsort_datetimearray_warning(self): | |
with tm.assert_produces_warning(None): | ||
nargsort(data) | ||
|
||
@pytest.mark.parametrize('data', [ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually instead of here, can you add the test in pandas/tests/extension/base/missing.py (and it will be generic with the data_missing fixure) |
||
Categorical(['a', 'c', 'a', 'b']), | ||
to_datetime([0, 2, 0, 1]).tz_localize('Europe/Brussels')]) | ||
def test_nargsort_extension_array(self, data): | ||
result = nargsort(data) | ||
expected = np.array([0, 2, 3, 1], dtype=np.intp) | ||
tm.assert_numpy_array_equal(result, expected) | ||
|
||
|
||
class TestMerge: | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why can't we leave this and just enhance the signature, passing
na_position
? then the default implementation will just workThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_values_for_argsort
is added backed to ExtensionArray interface. Actually,_values_for_argsort
which callsnp.array
does the same thing asnp.asanyarray
innargsort
. But the code inExtensionArray.argsort
is organized in more general way.DatetimeLikeArray
andPeriodArray
's own_values_for_argsort
methods are deleted. Otherwise, NA values inPeriodArray
would get a negative number fromself._data
which would be placed in the beginning innargsort
.