|
10 | 10 |
|
11 | 11 | from pandas.util.testing import (assert_series_equal,
|
12 | 12 | assert_frame_equal,
|
13 |
| - assertRaisesRegexp, |
14 |
| - is_sorted) |
| 13 | + assertRaisesRegexp) |
15 | 14 |
|
16 | 15 | import pandas.util.testing as tm
|
17 | 16 |
|
@@ -495,15 +494,53 @@ def test_frame_column_inplace_sort_exception(self):
|
495 | 494 |
|
496 | 495 | def test_sort_nat_values_in_int_column(self):
|
497 | 496 |
|
498 |
| - # GH 14922, sorting with large float and multiple columns incorrect |
| 497 | + # GH 14922: "sorting with large float and multiple columns incorrect" |
| 498 | + |
| 499 | + # cause was that the int64 value NaT was considered as "na". Which is |
| 500 | + # only correct for datetime64 columns. |
| 501 | + |
499 | 502 | int_values = (2, int(NaT))
|
500 | 503 | float_values = (2.0, -1.797693e308)
|
501 | 504 |
|
502 | 505 | df = DataFrame(dict(int=int_values, float=float_values),
|
503 | 506 | columns=["int", "float"])
|
504 | 507 |
|
505 |
| - df_sorted = df.sort_values(["int", "float"]) |
506 |
| - df_expected = DataFrame(dict(int=int_values[::-1], float=float_values[::-1]), |
507 |
| - columns=["int", "float"], index=[1, 0]) |
| 508 | + df_reversed = DataFrame(dict(int=int_values[::-1], |
| 509 | + float=float_values[::-1]), |
| 510 | + columns=["int", "float"], |
| 511 | + index=[1, 0]) |
| 512 | + |
| 513 | + # NaT is not a "na" for int64 columns, so na_position must not |
| 514 | + # influence the result: |
| 515 | + df_sorted = df.sort_values(["int", "float"], na_position="last") |
| 516 | + assert_frame_equal(df_sorted, df_reversed) |
| 517 | + |
| 518 | + df_sorted = df.sort_values(["int", "float"], na_position="first") |
| 519 | + assert_frame_equal(df_sorted, df_reversed) |
| 520 | + |
| 521 | + # reverse sorting order |
| 522 | + df_sorted = df.sort_values(["int", "float"], ascending=False) |
| 523 | + assert_frame_equal(df_sorted, df) |
| 524 | + |
| 525 | + # and now check if NaT is still considered as "na" for datetime64 |
| 526 | + # columns: |
| 527 | + df = DataFrame(dict(int=int_values, float=float_values), |
| 528 | + columns=["int", "float"]) |
| 529 | + |
| 530 | + df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT], |
| 531 | + float=float_values), columns=["datetime", "float"]) |
| 532 | + |
| 533 | + # check if the dtype is datetime64[ns]: |
| 534 | + assert df["datetime"].dtypes == np.dtype("datetime64[ns]"),\ |
| 535 | + "this test function is not reliable anymore" |
| 536 | + |
| 537 | + df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")], |
| 538 | + float=float_values[::-1]), |
| 539 | + columns=["datetime", "float"], |
| 540 | + index=[1, 0]) |
| 541 | + |
| 542 | + df_sorted = df.sort_values(["datetime", "float"], na_position="first") |
| 543 | + assert_frame_equal(df_sorted, df_reversed) |
508 | 544 |
|
509 |
| - assert_frame_equal(df_sorted, df_expected) |
| 545 | + df_sorted = df.sort_values(["datetime", "float"], na_position="last") |
| 546 | + assert_frame_equal(df_sorted, df_reversed) |
0 commit comments