Skip to content

Commit ad7d051

Browse files
Jean-Mathieu Deschenesjreback
Jean-Mathieu Deschenes
authored andcommitted
BUG: DataFrame sort_values and multiple "by" columns fails to order NaT correctly
closes #16836 Author: Jean-Mathieu Deschenes <[email protected]> This patch had conflicts when merged, resolved by Committer: Jeff Reback <[email protected]> Closes #16995 from jdeschenes/datetime_sort_issues and squashes the following commits: 257e10a [Jean-Mathieu Deschenes] Changes requested by @jreback c6d55e2 [Jean-Mathieu Deschenes] Fix for #16836
1 parent 54f6648 commit ad7d051

File tree

3 files changed

+30
-7
lines changed

3 files changed

+30
-7
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,7 @@ Reshaping
648648
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
649649
- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
650650
- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)
651+
- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`)
651652

652653
Numeric
653654
^^^^^^^

pandas/core/frame.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -3453,18 +3453,13 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
34533453
if len(by) > 1:
34543454
from pandas.core.sorting import lexsort_indexer
34553455

3456-
def trans(v):
3457-
if needs_i8_conversion(v):
3458-
return v.view('i8')
3459-
return v
3460-
34613456
keys = []
34623457
for x in by:
34633458
k = self.xs(x, axis=other_axis).values
34643459
if k.ndim == 2:
34653460
raise ValueError('Cannot sort by duplicate column %s' %
34663461
str(x))
3467-
keys.append(trans(k))
3462+
keys.append(k)
34683463
indexer = lexsort_indexer(keys, orders=ascending,
34693464
na_position=na_position)
34703465
indexer = _ensure_platform_int(indexer)

pandas/tests/frame/test_sorting.py

+28-1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,11 @@ def test_sort_datetimes(self):
269269
df2 = df.sort_values(by=['B'])
270270
assert_frame_equal(df1, df2)
271271

272+
df1 = df.sort_values(by='B')
273+
274+
df2 = df.sort_values(by=['C', 'B'])
275+
assert_frame_equal(df1, df2)
276+
272277
def test_frame_column_inplace_sort_exception(self):
273278
s = self.frame['A']
274279
with tm.assert_raises_regex(ValueError, "This Series is a view"):
@@ -321,7 +326,29 @@ def test_sort_nat_values_in_int_column(self):
321326
assert_frame_equal(df_sorted, df_reversed)
322327

323328
df_sorted = df.sort_values(["datetime", "float"], na_position="last")
324-
assert_frame_equal(df_sorted, df_reversed)
329+
assert_frame_equal(df_sorted, df)
330+
331+
# Ascending should not affect the results.
332+
df_sorted = df.sort_values(["datetime", "float"], ascending=False)
333+
assert_frame_equal(df_sorted, df)
334+
335+
def test_sort_nat(self):
336+
337+
# GH 16836
338+
339+
d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01',
340+
np.nan, '2016-01-01']]
341+
d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01',
342+
'2016-01-01', '2015-01-01']]
343+
df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3])
344+
345+
d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01',
346+
'2016-01-01', np.nan]]
347+
d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01',
348+
'2017-01-01', '2016-01-01']]
349+
expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2])
350+
sorted_df = df.sort_values(by=['a', 'b'], )
351+
tm.assert_frame_equal(sorted_df, expected)
325352

326353

327354
class TestDataFrameSortIndexKinds(TestData):

0 commit comments

Comments
 (0)