|
6 | 6 |
|
7 | 7 | from pandas.compat import lrange
|
8 | 8 | from pandas import (DataFrame, Series, MultiIndex, Timestamp,
|
9 |
| - date_range) |
| 9 | + date_range, NaT) |
10 | 10 |
|
11 | 11 | from pandas.util.testing import (assert_series_equal,
|
12 | 12 | assert_frame_equal,
|
@@ -491,3 +491,49 @@ def test_frame_column_inplace_sort_exception(self):
|
491 | 491 |
|
492 | 492 | cp = s.copy()
|
493 | 493 | cp.sort_values() # it works!
|
| 494 | + |
| 495 | + def test_sort_nat_values_in_int_column(self): |
| 496 | + |
| 497 | + # GH 14922: "sorting with large float and multiple columns incorrect" |
| 498 | + |
| 499 | + # cause was that the int64 value NaT was considered as "na". Which is |
| 500 | + # only correct for datetime64 columns. |
| 501 | + |
| 502 | + int_values = (2, int(NaT)) |
| 503 | + float_values = (2.0, -1.797693e308) |
| 504 | + |
| 505 | + df = DataFrame(dict(int=int_values, float=float_values), |
| 506 | + columns=["int", "float"]) |
| 507 | + |
| 508 | + df_reversed = DataFrame(dict(int=int_values[::-1], |
| 509 | + float=float_values[::-1]), |
| 510 | + columns=["int", "float"], |
| 511 | + index=[1, 0]) |
| 512 | + |
| 513 | + # NaT is not a "na" for int64 columns, so na_position must not |
| 514 | + # influence the result: |
| 515 | + df_sorted = df.sort_values(["int", "float"], na_position="last") |
| 516 | + assert_frame_equal(df_sorted, df_reversed) |
| 517 | + |
| 518 | + df_sorted = df.sort_values(["int", "float"], na_position="first") |
| 519 | + assert_frame_equal(df_sorted, df_reversed) |
| 520 | + |
| 521 | + # reverse sorting order |
| 522 | + df_sorted = df.sort_values(["int", "float"], ascending=False) |
| 523 | + assert_frame_equal(df_sorted, df) |
| 524 | + |
| 525 | + # and now check if NaT is still considered as "na" for datetime64 |
| 526 | + # columns: |
| 527 | + df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], |
| 528 | + float=float_values), columns=["datetime", "float"]) |
| 529 | + |
| 530 | + df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], |
| 531 | + float=float_values[::-1]), |
| 532 | + columns=["datetime", "float"], |
| 533 | + index=[1, 0]) |
| 534 | + |
| 535 | + df_sorted = df.sort_values(["datetime", "float"], na_position="first") |
| 536 | + assert_frame_equal(df_sorted, df_reversed) |
| 537 | + |
| 538 | + df_sorted = df.sort_values(["datetime", "float"], na_position="last") |
| 539 | + assert_frame_equal(df_sorted, df_reversed) |
0 commit comments