From 1c11b7202d94337f8dee5f627e660be1f0b0fae0 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 4 Dec 2017 02:06:42 -0800
Subject: [PATCH 1/2] BUG: Don't overflow in DataFrame init with uint

For integers larger than what uint64 can handle,
we gracefully default to the object dtype instead
of overflowing.

Closes gh-18584.
---
 doc/source/whatsnew/v0.22.0.txt         |  3 +--
 pandas/_libs/src/inference.pyx          | 11 +++++++++--
 pandas/tests/dtypes/test_inference.py   |  6 ++++++
 pandas/tests/frame/test_constructors.py | 10 ++++++++++
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 495d0beaf3faa..3ca1417a108f5 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -186,7 +186,7 @@ Conversion
 ^^^^^^^^^^
 
 - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
--
+- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)
 -
 
 Indexing
@@ -262,4 +262,3 @@ Other
 - Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
 - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
 - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
--
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index cb192fcced318..75c892873573e 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -181,6 +181,13 @@ cdef class Seen(object):
         """
         Set flags indicating that an integer value was encountered.
 
+        In addition to setting a flag that an integer was seen, we
+        also set two flags depending on the type of integer seen:
+
+        1) sint_ : a negative (signed) number was encountered
+        2) uint_ : a positive number in the range of [2**63, 2**64)
+                   was encountered
+
         Parameters
         ----------
         val : Python int
@@ -188,7 +195,7 @@ cdef class Seen(object):
         """
         self.int_ = 1
         self.sint_ = self.sint_ or (val < 0)
-        self.uint_ = self.uint_ or (val > oINT64_MAX)
+        self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
 
     @property
     def numeric_(self):
@@ -1263,7 +1270,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             if not seen.null_:
                 seen.saw_int(int(val))
 
-                if seen.uint_ and seen.sint_:
+                if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
                     seen.object_ = 1
                     break
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index ef12416ef4e1c..21bb099d09261 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -388,6 +388,12 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
         result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
         tm.assert_almost_equal(result, expected)
 
+    def test_convert_uint64_overflow(self):
+        # see gh-18584
+        arr = np.array([2**64], dtype=object)
+        result = lib.maybe_convert_objects(arr)
+        tm.assert_numpy_array_equal(arr, result)
+
     def test_maybe_convert_objects_uint64(self):
         # see gh-4471
         arr = np.array([2**63], dtype=object)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 876e0ea7ea0b3..1409383829ac1 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -195,6 +195,16 @@ def test_constructor_overflow_int64(self):
         df_crawls = DataFrame(data)
         assert df_crawls['uid'].dtype == np.uint64
 
+    @pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
+                                        np.array([2**64]), [2**64]])
+    def test_constructor_overflow_uint64(self, values):
+        # see gh-18584
+        value = values[0]
+        result = DataFrame(values)
+
+        assert result[0].dtype == object
+        assert result[0][0] == value
+
     def test_constructor_ordereddict(self):
         import random
         nitems = 100

From 9d5abd3e80299a5ab39fe0bf7617ec6d00a3bd24 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Tue, 5 Dec 2017 01:52:37 -0800
Subject: [PATCH 2/2] Don't overflow in DataFrame init with int

For integers smaller than what int64 can
handle, we gracefully default to the object
dtype instead of overflowing.
---
 doc/source/whatsnew/v0.22.0.txt         |  2 +-
 pandas/_libs/src/inference.pyx          | 12 +++++++-----
 pandas/tests/dtypes/test_inference.py   |  5 +++--
 pandas/tests/frame/test_constructors.py |  6 ++++--
 4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
index 3ca1417a108f5..f2500bb29d0be 100644
--- a/doc/source/whatsnew/v0.22.0.txt
+++ b/doc/source/whatsnew/v0.22.0.txt
@@ -186,7 +186,7 @@ Conversion
 ^^^^^^^^^^
 
 - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
-- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)
+- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
 -
 
 Indexing
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index 75c892873573e..e15b4693432d9 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -184,9 +184,10 @@ cdef class Seen(object):
         In addition to setting a flag that an integer was seen, we
         also set two flags depending on the type of integer seen:
 
-        1) sint_ : a negative (signed) number was encountered
-        2) uint_ : a positive number in the range of [2**63, 2**64)
-                   was encountered
+        1) sint_ : a negative (signed) number in the
+                   range of [-2**63, 0) was encountered
+        2) uint_ : a positive number in the range of
+                   [2**63, 2**64) was encountered
 
         Parameters
         ----------
@@ -194,7 +195,7 @@ cdef class Seen(object):
             Value with which to set the flags.
         """
         self.int_ = 1
-        self.sint_ = self.sint_ or (val < 0)
+        self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
         self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
 
     @property
@@ -1270,7 +1271,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             if not seen.null_:
                 seen.saw_int(int(val))
 
-                if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
+                if ((seen.uint_ and seen.sint_) or
+                        val > oUINT64_MAX or val < oINT64_MIN):
                     seen.object_ = 1
                     break
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index 21bb099d09261..092bbb36169d4 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -388,9 +388,10 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
         result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
         tm.assert_almost_equal(result, expected)
 
-    def test_convert_uint64_overflow(self):
+    @pytest.mark.parametrize("value", [-2**63 - 1, 2**64])
+    def test_convert_int_overflow(self, value):
         # see gh-18584
-        arr = np.array([2**64], dtype=object)
+        arr = np.array([value], dtype=object)
         result = lib.maybe_convert_objects(arr)
         tm.assert_numpy_array_equal(arr, result)
 
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index 1409383829ac1..8fd196bfc4d2a 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -196,8 +196,10 @@ def test_constructor_overflow_int64(self):
         assert df_crawls['uid'].dtype == np.uint64
 
     @pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
-                                        np.array([2**64]), [2**64]])
-    def test_constructor_overflow_uint64(self, values):
+                                        np.array([2**65]), [2**64 + 1],
+                                        np.array([-2**63 - 4], dtype=object),
+                                        np.array([-2**64 - 1]), [-2**65 - 2]])
+    def test_constructor_int_overflow(self, values):
         # see gh-18584
         value = values[0]
         result = DataFrame(values)