@@ -3286,6 +3286,59 @@ def test_drop_duplicates(self):
3286
3286
expected = df2 .drop_duplicates (['A' , 'B' ], take_last = True )
3287
3287
assert_frame_equal (result , expected )
3288
3288
3289
+ def test_drop_duplicates_NA (self ):
3290
+ # none
3291
+ df = DataFrame ({'A' : [None , None , 'foo' , 'bar' ,
3292
+ 'foo' , 'bar' , 'bar' , 'foo' ],
3293
+ 'B' : ['one' , 'one' , 'two' , 'two' ,
3294
+ 'two' , 'two' , 'one' , 'two' ],
3295
+ 'C' : [1.0 , np .nan , np .nan , np .nan , 1. , 1. , 1 , 1. ],
3296
+ 'D' : range (8 )})
3297
+
3298
+ # single column
3299
+ result = df .drop_duplicates ('A' )
3300
+ expected = df .ix [[0 , 2 , 3 ]]
3301
+ assert_frame_equal (result , expected )
3302
+
3303
+ result = df .drop_duplicates ('A' , take_last = True )
3304
+ expected = df .ix [[1 , 6 , 7 ]]
3305
+ assert_frame_equal (result , expected )
3306
+
3307
+ # multi column
3308
+ result = df .drop_duplicates (['A' , 'B' ])
3309
+ expected = df .ix [[0 , 2 , 3 , 6 ]]
3310
+ assert_frame_equal (result , expected )
3311
+
3312
+ result = df .drop_duplicates (['A' , 'B' ], take_last = True )
3313
+ expected = df .ix [[1 , 5 , 6 , 7 ]]
3314
+ assert_frame_equal (result , expected )
3315
+
3316
+ # nan
3317
+ df = DataFrame ({'A' : ['foo' , 'bar' , 'foo' , 'bar' ,
3318
+ 'foo' , 'bar' , 'bar' , 'foo' ],
3319
+ 'B' : ['one' , 'one' , 'two' , 'two' ,
3320
+ 'two' , 'two' , 'one' , 'two' ],
3321
+ 'C' : [1.0 , np .nan , np .nan , np .nan , 1. , 1. , 1 , 1. ],
3322
+ 'D' : range (8 )})
3323
+
3324
+ # single column
3325
+ result = df .drop_duplicates ('C' )
3326
+ expected = df [:2 ]
3327
+ assert_frame_equal (result , expected )
3328
+
3329
+ result = df .drop_duplicates ('C' , take_last = True )
3330
+ expected = df .ix [[3 , 7 ]]
3331
+ assert_frame_equal (result , expected )
3332
+
3333
+ # multi column
3334
+ result = df .drop_duplicates (['C' , 'B' ])
3335
+ expected = df .ix [[0 , 1 , 2 , 4 ]]
3336
+ assert_frame_equal (result , expected )
3337
+
3338
+ result = df .drop_duplicates (['C' , 'B' ], take_last = True )
3339
+ expected = df .ix [[1 , 3 , 6 , 7 ]]
3340
+ assert_frame_equal (result , expected )
3341
+
3289
3342
def test_drop_col_still_multiindex (self ):
3290
3343
arrays = [[ 'a' , 'b' , 'c' , 'top' ],
3291
3344
[ '' , '' , '' , 'OD' ],
0 commit comments