Skip to content

Commit a9921ba

Browse files
committed
COMPAT: Argsort position matches NumPy
Closes pandas-dev#29884
1 parent 6752833 commit a9921ba

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

doc/source/whatsnew/v1.0.0.rst

+38
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,44 @@ source, you should no longer need to install Cython into your build environment
246246
Backwards incompatible API changes
247247
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
248248

249+
.. _whatsnew_100.api_breaking.nat_sort:
250+
251+
Changed sort position for ``NaT``
252+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
253+
254+
:attr:`NaT` will now sort at the *end* rather than the beginning in sorting functions (:issue:`29884`).
255+
This matches the behavior in NumPy 1.18 and newer, which makes the ``NaT`` behavior consistent
256+
with other missing values like :attr:`numpy.nan`.
257+
258+
.. ipython:: python
259+
260+
values = pd.Index(['2001', 'NaT', '2000'], dtype='datetime64[ns]')
261+
262+
*pandas 0.25.x*
263+
264+
.. code-block:: python
265+
266+
>>> values.sort_values()
267+
DatetimeIndex(['NaT', '2000-01-01', '2001-01-01'], dtype='datetime64[ns]', freq=None)
268+
269+
>>> values.argsort()
270+
array([1, 2, 0])
271+
272+
273+
*pandas 1.0.0*
274+
275+
.. ipython:: python
276+
277+
values.sort_values()
278+
values.argsort()
279+
280+
This affects all sorting functions on indexes, Series, DataFrames, and arrays.
281+
282+
283+
.. note::
284+
285+
This change was made between pandas 1.0.0rc0 and pandas 1.0.0.
286+
249287
.. _whatsnew_100.api_breaking.MultiIndex._names:
250288

251289
Avoid using names from ``MultiIndex.levels``

pandas/core/indexes/datetimelike.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ def map(self, mapper, na_action=None):
180180
except Exception:
181181
return self.astype(object).map(mapper)
182182

183+
def argsort(self, *args, **kwargs) -> np.ndarray:
184+
return np.argsort(self._data)
185+
183186
def sort_values(self, return_indexer=False, ascending=True):
184187
"""
185188
Return sorted copy of Index.
@@ -191,10 +194,7 @@ def sort_values(self, return_indexer=False, ascending=True):
191194
sorted_index = self.take(_as)
192195
return sorted_index, _as
193196
else:
194-
# NB: using asi8 instead of _ndarray_values matters in numpy 1.18
195-
# because the treatment of NaT has been changed to put NaT last
196-
# instead of first.
197-
sorted_values = np.sort(self.asi8)
197+
sorted_values = np.sort(self._ndarray_values)
198198

199199
freq = self.freq
200200
if freq is not None and not is_period_dtype(self):

pandas/tests/indexes/datetimes/test_ops.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def test_order_with_freq(self, idx):
247247
),
248248
(
249249
[pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT],
250-
[pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
250+
["2011-01-02", "2011-01-03", "2011-01-05", pd.NaT, pd.NaT],
251251
),
252252
],
253253
)
@@ -269,14 +269,17 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture)
269269
ordered, indexer = index.sort_values(return_indexer=True)
270270
tm.assert_index_equal(ordered, expected)
271271

272-
exp = np.array([0, 4, 3, 1, 2])
272+
if index.isna().any():
273+
exp = np.array([3, 1, 2, 0, 4])
274+
else:
275+
exp = np.array([0, 4, 3, 1, 2])
273276
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
274277
assert ordered.freq is None
275278

276279
ordered, indexer = index.sort_values(return_indexer=True, ascending=False)
277280
tm.assert_index_equal(ordered, expected[::-1])
278281

279-
exp = np.array([2, 1, 3, 4, 0])
282+
exp = exp[::-1]
280283
tm.assert_numpy_array_equal(indexer, exp, check_dtype=False)
281284
assert ordered.freq is None
282285

0 commit comments

Comments
 (0)