-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: allow missing values in Index when calling Index.sort_values #35604
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 58 commits
076061b
3fa9351
333c6e4
8414fd0
fa12898
e2575c9
4293e22
275067e
098fe10
f0d4c2d
743eca6
9d17d7a
744e5c1
13fcd94
33b02da
73cc94c
153fb06
3481b81
9d92ddd
7ec9e7e
2d16b8d
8939040
066f0af
7e351cd
79ed8ee
62268e8
5cab3d4
30ac949
0ac0a3c
051c3e7
dfdee40
acf6170
4b43852
fb8f703
fc58c3f
1dfd4f2
65c001a
dd82cc1
a3855ad
481de59
c7e8240
ad29fff
6ae6e8a
213b13b
760bda1
d8f9061
2f8bec6
29d6849
8b81e55
5b2a0d3
a3310ba
e80bc9d
2007699
8fd5a77
2e39294
d78a9a2
5715c31
bfd2e9c
54a6e82
f60d2a8
af92fe8
6d33657
4935309
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -306,10 +306,17 @@ def nargsort( | |
non_nan_idx = idx[~mask] | ||
|
||
nan_idx = np.nonzero(mask)[0] | ||
|
||
if not ascending: | ||
non_nans = non_nans[::-1] | ||
non_nan_idx = non_nan_idx[::-1] | ||
indexer = non_nan_idx[non_nans.argsort(kind=kind)] | ||
try: | ||
indexer = non_nan_idx[non_nans.argsort(kind=kind)] | ||
except TypeError: | ||
# For compatibility with Series: fall back to quicksort | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have a test that hits this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, this is a remnant from trying to bring |
||
# when mergesort is unavailable for object element type | ||
indexer = non_nan_idx[non_nans.argsort(kind="quicksort")] | ||
|
||
if not ascending: | ||
indexer = indexer[::-1] | ||
# Finally, place the NaNs at the end or the beginning according to | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -174,9 +174,6 @@ def _check_freq(index, expected_index): | |
|
||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) | ||
tm.assert_index_equal(ordered, expected[::-1]) | ||
|
||
exp = np.array([2, 1, 3, 4, 0]) | ||
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) | ||
_check_freq(ordered, idx) | ||
|
||
pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") | ||
|
@@ -333,3 +330,16 @@ def test_freq_setter_deprecated(self): | |
# warning for setter | ||
with pytest.raises(AttributeError, match="can't set attribute"): | ||
idx.freq = pd.offsets.Day() | ||
|
||
|
||
@pytest.mark.xfail(reason="PeriodIndex.sort_values currently unstable") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well its doesn't work :->, do we have an issue tracking this, pls add the refernce in the xfail (and if we don't pls create an issue) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Couldn't find an existing issue, so I opened #35922 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. kk add the issue number to the xfail message There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. though I think the comprehensive test below covers this? (not averse to having this as well, just asking) |
||
def test_order_incompat(): | ||
# GH 35584. The new implementation of sort_values for Index.sort_values | ||
# is stable when sorting in descending order. PeriodIndex.sort_values | ||
# currently isn't stable. xfail should be removed after | ||
# the implementations' behavior is synchronized (xref GH 35922) | ||
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") | ||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") | ||
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) | ||
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False) | ||
tm.assert_numpy_array_equal(indexer1, indexer2) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,14 @@ | |
from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion | ||
|
||
import pandas as pd | ||
from pandas import CategoricalIndex, MultiIndex, RangeIndex | ||
from pandas import ( | ||
CategoricalIndex, | ||
DatetimeIndex, | ||
MultiIndex, | ||
PeriodIndex, | ||
RangeIndex, | ||
TimedeltaIndex, | ||
) | ||
import pandas._testing as tm | ||
|
||
|
||
|
@@ -391,3 +398,40 @@ def test_astype_preserves_name(self, index, dtype): | |
assert result.names == index.names | ||
else: | ||
assert result.name == index.name | ||
|
||
|
||
@pytest.mark.parametrize("na_position", [None, "middle"]) | ||
def test_sort_values_invalid_na_position(index_with_missing, na_position): | ||
|
||
if type(index_with_missing) in [DatetimeIndex, PeriodIndex, TimedeltaIndex]: | ||
pytest.xfail("stable descending order sort not implemented") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. e.g. maybe just enumerate the PeriodIndex case here (do the others have an issue?) pls open if they don't (you can make a single issue with checkboxes) |
||
elif type(index_with_missing) in [CategoricalIndex, MultiIndex]: | ||
pytest.xfail("missing value sorting order not defined for index type") | ||
|
||
if na_position not in ["first", "last"]: | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
with pytest.raises( | ||
ValueError, match=f"invalid na_position: {na_position}", | ||
): | ||
index_with_missing.sort_values(na_position=na_position) | ||
|
||
|
||
@pytest.mark.parametrize("na_position", ["first", "last"]) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def test_sort_values_with_missing(index_with_missing, na_position): | ||
# GH 35584. Test that sort_values works with missing values, | ||
# sort non-missing and place missing according to na_position | ||
|
||
if type(index_with_missing) in [DatetimeIndex, PeriodIndex, TimedeltaIndex]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback The tests in I've clarified the xfail messages everywhere and added comments with xrefs to the tests in Sorry if I misunderstood your question. |
||
pytest.xfail("stable descending order sort not implemented") | ||
elif type(index_with_missing) in [CategoricalIndex, MultiIndex]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you isinstance(....) instead There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
pytest.xfail("missing value sorting order not defined for index type") | ||
missing_count = np.sum(index_with_missing.isna()) | ||
jreback marked this conversation as resolved.
Show resolved
Hide resolved
|
||
not_na_vals = index_with_missing[index_with_missing.notna()].values | ||
sorted_values = np.sort(not_na_vals) | ||
if na_position == "first": | ||
sorted_values = np.concatenate([[None] * missing_count, sorted_values]) | ||
else: | ||
sorted_values = np.concatenate([sorted_values, [None] * missing_count]) | ||
expected = type(index_with_missing)(sorted_values) | ||
|
||
result = index_with_missing.sort_values(na_position=na_position) | ||
tm.assert_index_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you just check
request.parm
== 'multindex'? as its more explictThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.