Skip to content

Commit 30cee48

Browse files
committed
Don't overflow in DataFrame init with int
For integers smaller than what int64 can handle, we gracefully default to the object dtype instead of overflowing.
1 parent fdc2b93 commit 30cee48

File tree

3 files changed

+12
-8
lines changed

3 files changed

+12
-8
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ Conversion
185185
^^^^^^^^^^
186186

187187
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
188-
- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)
188+
- Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`)
189189
-
190190

191191
Indexing

pandas/_libs/src/inference.pyx

+7-5
Original file line numberDiff line numberDiff line change
@@ -184,17 +184,18 @@ cdef class Seen(object):
184184
In addition to setting a flag that an integer was seen, we
185185
also set two flags depending on the type of integer seen:
186186
187-
1) sint_ : a negative (signed) number was encountered
188-
2) uint_ : a positive number in the range of [2**63, 2**64)
189-
was encountered
187+
1) sint_ : a negative (signed) number in the
188+
range of [-2**63, 0) was encountered
189+
2) uint_ : a positive number in the range of
190+
[2**63, 2**64) was encountered
190191
191192
Parameters
192193
----------
193194
val : Python int
194195
Value with which to set the flags.
195196
"""
196197
self.int_ = 1
197-
self.sint_ = self.sint_ or (val < 0)
198+
self.sint_ = self.sint_ or (oINT64_MIN <= val < 0)
198199
self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
199200

200201
@property
@@ -1270,7 +1271,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
12701271
if not seen.null_:
12711272
seen.saw_int(int(val))
12721273

1273-
if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
1274+
if ((seen.uint_ and seen.sint_) or
1275+
val > oUINT64_MAX or val < oINT64_MIN):
12741276
seen.object_ = 1
12751277
break
12761278

pandas/tests/frame/test_constructors.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -196,8 +196,10 @@ def test_constructor_overflow_int64(self):
196196
assert df_crawls['uid'].dtype == np.uint64
197197

198198
@pytest.mark.parametrize("values", [np.array([2**64], dtype=object),
199-
np.array([2**64]), [2**64]])
200-
def test_constructor_overflow_uint64(self, values):
199+
np.array([2**65]), [2**64 + 1],
200+
np.array([-2**63 - 4], dtype=object),
201+
np.array([-2**64 - 1]), [-2**65 - 2]])
202+
def test_constructor_int_overflow(self, values):
201203
# see gh-18584
202204
value = values[0]
203205
result = DataFrame(values)

0 commit comments

Comments
 (0)