pandas-dev · jorisvandenbossche · Dec 5, 2017 · Dec 4, 2017 · Dec 5, 2017 · jreback
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -186,7 +186,7 @@ Conversion
 ^^^^^^^^^^
 
 - Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
--
+- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
 -
 
 Indexing
@@ -262,4 +262,3 @@ Other
 - Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
 - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
 - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
--
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
@@ -181,14 +181,22 @@ cdef class Seen(object):
         """
         Set flags indicating that an integer value was encountered.
 
+        In addition to setting a flag that an integer was seen, we
+        also set two flags depending on the type of integer seen:
+
+        1) sint_ : a negative (signed) number in the
+                   range of [-2**63, 0) was encountered
+        2) uint_ : a positive number in the range of
+                   [2**63, 2**64) was encountered
+
         Parameters
         ----------
         val : Python int
             Value with which to set the flags.
         """
         self.int_ = 1
-        self.sint_ = self.sint_ or (val < 0)
-        self.uint_ = self.uint_ or (val > oINT64_MAX)
+        self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
+        self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
 
     @property
     def numeric_(self):
@@ -1263,7 +1271,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             if not seen.null_:
                 seen.saw_int(int(val))
 
-                if seen.uint_ and seen.sint_:
+                if ((seen.uint_ and seen.sint_) or
+                        val > oUINT64_MAX or val < oINT64_MIN):
                     seen.object_ = 1
                     break
 

diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -388,6 +388,13 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
         result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
         tm.assert_almost_equal(result, expected)
 
+    @pytest.mark.parametrize("value", [-2**63 - 1, 2**64])
+    def test_convert_int_overflow(self, value):
+        # see gh-18584
+        arr = np.array([value], dtype=object)
+        result = lib.maybe_convert_objects(arr)
+        tm.assert_numpy_array_equal(arr, result)
+
     def test_maybe_convert_objects_uint64(self):
         # see gh-4471
         arr = np.array([2**63], dtype=object)

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -195,6 +195,18 @@ def test_constructor_overflow_int64(self):
         df_crawls = DataFrame(data)
         assert df_crawls['uid'].dtype == np.uint64
 
+    @pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
+                                        np.array([2**65]), [2**64 + 1],
+                                        np.array([-2**63 - 4], dtype=object),
+                                        np.array([-2**64 - 1]), [-2**65 - 2]])
+    def test_constructor_int_overflow(self, values):
+        # see gh-18584
+        value = values[0]
+        result = DataFrame(values)
+
+        assert result[0].dtype == object
+        assert result[0][0] == value
+
     def test_constructor_ordereddict(self):
         import random
         nitems = 100