Skip to content

Commit 21e610c

Browse files
committed
extended tests + minor cleanup
1 parent 358a31e commit 21e610c

File tree

1 file changed

+44
-7
lines changed

1 file changed

+44
-7
lines changed

pandas/tests/frame/test_sorting.py

+44-7
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@
1010

1111
from pandas.util.testing import (assert_series_equal,
1212
assert_frame_equal,
13-
assertRaisesRegexp,
14-
is_sorted)
13+
assertRaisesRegexp)
1514

1615
import pandas.util.testing as tm
1716

@@ -495,15 +494,53 @@ def test_frame_column_inplace_sort_exception(self):
495494

496495
def test_sort_nat_values_in_int_column(self):
497496

498-
# GH 14922, sorting with large float and multiple columns incorrect
497+
# GH 14922: "sorting with large float and multiple columns incorrect"
498+
499+
# cause was that the int64 value NaT was considered as "na". Which is
500+
# only correct for datetime64 columns.
501+
499502
int_values = (2, int(NaT))
500503
float_values = (2.0, -1.797693e308)
501504

502505
df = DataFrame(dict(int=int_values, float=float_values),
503506
columns=["int", "float"])
504507

505-
df_sorted = df.sort_values(["int", "float"])
506-
df_expected = DataFrame(dict(int=int_values[::-1], float=float_values[::-1]),
507-
columns=["int", "float"], index=[1, 0])
508+
df_reversed = DataFrame(dict(int=int_values[::-1],
509+
float=float_values[::-1]),
510+
columns=["int", "float"],
511+
index=[1, 0])
512+
513+
# NaT is not a "na" for int64 columns, so na_position must not
514+
# influence the result:
515+
df_sorted = df.sort_values(["int", "float"], na_position="last")
516+
assert_frame_equal(df_sorted, df_reversed)
517+
518+
df_sorted = df.sort_values(["int", "float"], na_position="first")
519+
assert_frame_equal(df_sorted, df_reversed)
520+
521+
# reverse sorting order
522+
df_sorted = df.sort_values(["int", "float"], ascending=False)
523+
assert_frame_equal(df_sorted, df)
524+
525+
# and now check if NaT is still considered as "na" for datetime64
526+
# columns:
527+
df = DataFrame(dict(int=int_values, float=float_values),
528+
columns=["int", "float"])
529+
530+
df = DataFrame(dict(datetime=[Timestamp("2016-01-01"), NaT],
531+
float=float_values), columns=["datetime", "float"])
532+
533+
# check if the dtype is datetime64[ns]:
534+
assert df["datetime"].dtypes == np.dtype("datetime64[ns]"),\
535+
"this test function is not reliable anymore"
536+
537+
df_reversed = DataFrame(dict(datetime=[NaT, Timestamp("2016-01-01")],
538+
float=float_values[::-1]),
539+
columns=["datetime", "float"],
540+
index=[1, 0])
541+
542+
df_sorted = df.sort_values(["datetime", "float"], na_position="first")
543+
assert_frame_equal(df_sorted, df_reversed)
508544

509-
assert_frame_equal(df_sorted, df_expected)
545+
df_sorted = df.sort_values(["datetime", "float"], na_position="last")
546+
assert_frame_equal(df_sorted, df_reversed)

0 commit comments

Comments
 (0)