diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 2259eb7d89534..668b0c2b5c3b7 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -222,6 +222,7 @@ Sparse Reshaping ^^^^^^^^^ - Joining/Merging with a non unique ``PeriodIndex`` raised a TypeError (:issue:`16871`) +- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`) Numeric diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a79ca1d4eab1..85b3f27410508 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3336,18 +3336,13 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False, if len(by) > 1: from pandas.core.sorting import lexsort_indexer - def trans(v): - if needs_i8_conversion(v): - return v.view('i8') - return v - keys = [] for x in by: k = self.xs(x, axis=other_axis).values if k.ndim == 2: raise ValueError('Cannot sort by duplicate column %s' % str(x)) - keys.append(trans(k)) + keys.append(k) indexer = lexsort_indexer(keys, orders=ascending, na_position=na_position) indexer = _ensure_platform_int(indexer) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 891c94b59074a..c356ed11e9328 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -269,6 +269,11 @@ def test_sort_datetimes(self): df2 = df.sort_values(by=['B']) assert_frame_equal(df1, df2) + df1 = df.sort_values(by='B') + + df2 = df.sort_values(by=['C', 'B']) + assert_frame_equal(df1, df2) + def test_frame_column_inplace_sort_exception(self): s = self.frame['A'] with tm.assert_raises_regex(ValueError, "This Series is a view"): @@ -321,7 +326,27 @@ def test_sort_nat_values_in_int_column(self): assert_frame_equal(df_sorted, df_reversed) df_sorted = df.sort_values(["datetime", "float"], na_position="last") - assert_frame_equal(df_sorted, df_reversed) + assert_frame_equal(df_sorted, df) + + # Ascending should not affect the results. + df_sorted = df.sort_values(["datetime", "float"], ascending=False) + assert_frame_equal(df_sorted, df) + + # GH 16836 + + d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01', + np.nan, '2016-01-01']] + d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01', + '2016-01-01', '2015-01-01']] + df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3]) + + d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01', + '2016-01-01', np.nan]] + d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01', + '2017-01-01', '2016-01-01']] + expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2]) + sorted_df = df.sort_values(by=['a', 'b'], ) + tm.assert_frame_equal(sorted_df, expected) class TestDataFrameSortIndexKinds(TestData):