From d6ae52a523902271ecedb8d141208b06f0fb52b9 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Fri, 23 Oct 2015 16:11:27 -0400
Subject: [PATCH 1/2] BUG: drop_duplicates drops non-duplicate rows in the
 presence of integer columns (GH 11376)

---
 doc/source/whatsnew/v0.17.1.txt |  2 +-
 pandas/tests/test_frame.py      | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
index 2bb9920b6f177..70226ca302a60 100755
--- a/doc/source/whatsnew/v0.17.1.txt
+++ b/doc/source/whatsnew/v0.17.1.txt
@@ -92,7 +92,7 @@ Bug Fixes
 - Bug in ``pivot_table`` with ``margins=True`` when indexes are of ``Categorical`` dtype (:issue:`10993`)
 - Bug in ``DataFrame.plot`` cannot use hex strings colors (:issue:`10299`)
 
-
+- Bug in ``DataFrame.drop_duplicates`` (regression from 0.16.2) causing some non-duplicate rows containing integer values to be dropped (:issue:`11376`)
 
 
 - Bug in ``pd.eval`` where unary ops in a list error (:issue:`11235`)
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 5c7f1ec9e0037..dfbd21997568d 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -8380,6 +8380,25 @@ def test_drop_duplicates(self):
         expected = df.iloc[[-2,-1]]
         assert_frame_equal(result, expected)
 
+        # GH 11376
+        df = pd.DataFrame({'x': [7, 6, 3, 3, 4, 8, 0],
+                           'y': [0, 6, 5, 5, 9, 1, 2]})
+        expected = df.loc[df.index != 3]
+        assert_frame_equal(df.drop_duplicates(), expected)
+
+        df = pd.DataFrame([[1 , 0], [0, 2]])
+        assert_frame_equal(df.drop_duplicates(), df)
+
+        df = pd.DataFrame([[-2, 0], [0, -4]])
+        assert_frame_equal(df.drop_duplicates(), df)
+
+        x = np.iinfo(np.int64).max / 3 * 2
+        df = pd.DataFrame([[-x, x], [0, x + 4]])
+        assert_frame_equal(df.drop_duplicates(), df)
+
+        df = pd.DataFrame([[-x, x], [x, x + 4]])
+        assert_frame_equal(df.drop_duplicates(), df)
+
     def test_drop_duplicates_for_take_all(self):
         df = DataFrame({'AAA': ['foo', 'bar', 'baz', 'bar',
                                 'foo', 'bar', 'qux', 'foo'],

From b7107283df30a7c45dbc30347d06c3bbda7f05f3 Mon Sep 17 00:00:00 2001
From: Evan Wright <evanpw@gmail.com>
Date: Fri, 23 Oct 2015 16:42:28 -0400
Subject: [PATCH 2/2] Revert "PERF: perf improvements in drop_duplicates for
 integer dtyped arrays"

This reverts commit a00c7ea1e2b6be5754a0461915cc48b007771b34, but leaves new tests and benchmark
---
 pandas/core/frame.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 31b7aacefcb60..4774fc4f17a91 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2994,13 +2994,7 @@ def duplicated(self, subset=None, keep='first'):
         from pandas.hashtable import duplicated_int64, _SIZE_HINT_LIMIT
 
         def f(vals):
-
-            # if we have integers we can directly index with these
-            if com.is_integer_dtype(vals):
-                from pandas.core.nanops import unique1d
-                labels, shape = vals, unique1d(vals)
-            else:
-                labels, shape = factorize(vals, size_hint=min(len(self), _SIZE_HINT_LIMIT))
+            labels, shape = factorize(vals, size_hint=min(len(self), _SIZE_HINT_LIMIT))
             return labels.astype('i8',copy=False), len(shape)
 
         if subset is None: