Skip to content

Commit 9d5abd3

Browse files
committed
Don't overflow in DataFrame init with int
For integers smaller than what int64 can handle, we gracefully default to the object dtype instead of overflowing.
1 parent 1c11b72 commit 9d5abd3

File tree

4 files changed

+15
-10
lines changed

4 files changed

+15
-10
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ Conversion
186186
^^^^^^^^^^
187187

188188
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
189-
- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)
189+
- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
190190
-
191191

192192
Indexing

pandas/_libs/src/inference.pyx

+7-5
Original file line numberDiff line numberDiff line change
@@ -184,17 +184,18 @@ cdef class Seen(object):
184184
In addition to setting a flag that an integer was seen, we
185185
also set two flags depending on the type of integer seen:
186186
187-
1) sint_ : a negative (signed) number was encountered
188-
2) uint_ : a positive number in the range of [2**63, 2**64)
189-
was encountered
187+
1) sint_ : a negative (signed) number in the
188+
range of [-2**63, 0) was encountered
189+
2) uint_ : a positive number in the range of
190+
[2**63, 2**64) was encountered
190191
191192
Parameters
192193
----------
193194
val : Python int
194195
Value with which to set the flags.
195196
"""
196197
self.int_ = 1
197-
self.sint_ = self.sint_ or (val < 0)
198+
self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
198199
self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
199200

200201
@property
@@ -1270,7 +1271,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
12701271
if not seen.null_:
12711272
seen.saw_int(int(val))
12721273

1273-
if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
1274+
if ((seen.uint_ and seen.sint_) or
1275+
val > oUINT64_MAX or val < oINT64_MIN):
12741276
seen.object_ = 1
12751277
break
12761278

pandas/tests/dtypes/test_inference.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,10 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
388388
result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
389389
tm.assert_almost_equal(result, expected)
390390

391-
def test_convert_uint64_overflow(self):
391+
@pytest.mark.parametrize("value", [-2**63 - 1, 2**64])
392+
def test_convert_int_overflow(self, value):
392393
# see gh-18584
393-
arr = np.array([2**64], dtype=object)
394+
arr = np.array([value], dtype=object)
394395
result = lib.maybe_convert_objects(arr)
395396
tm.assert_numpy_array_equal(arr, result)
396397

pandas/tests/frame/test_constructors.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,10 @@ def test_constructor_overflow_int64(self):
196196
assert df_crawls['uid'].dtype == np.uint64
197197

198198
@pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
199-
np.array([2**64]), [2**64]])
200-
def test_constructor_overflow_uint64(self, values):
199+
np.array([2**65]), [2**64 + 1],
200+
np.array([-2**63 - 4], dtype=object),
201+
np.array([-2**64 - 1]), [-2**65 - 2]])
202+
def test_constructor_int_overflow(self, values):
201203
# see gh-18584
202204
value = values[0]
203205
result = DataFrame(values)

0 commit comments

Comments
 (0)