Skip to content

Commit 5f5d332

Browse files
committed
BUG: Don't overflow in DataFrame init with uint
For integers larger than what uint64 can handle, we gracefully default to the object dtype instead of overflowing. Closes gh-18584.
1 parent 52fefd5 commit 5f5d332

File tree

4 files changed

+24
-4
lines changed

4 files changed

+24
-4
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ Conversion
185185
^^^^^^^^^^
186186

187187
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
188-
-
188+
- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)
189189
-
190190

191191
Indexing
@@ -261,4 +261,3 @@ Other
261261
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
262262
- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
263263
- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
264-
-

pandas/_libs/src/inference.pyx

+9-2
Original file line numberDiff line numberDiff line change
@@ -181,14 +181,21 @@ cdef class Seen(object):
181181
"""
182182
Set flags indicating that an integer value was encountered.
183183
184+
In addition to setting a flag that an integer was seen, we
185+
also set two flags depending on the type of integer seen:
186+
187+
1) sint_ : a negative (signed) number was encountered
188+
2) uint_ : a positive number in the range of [2**63, 2**64)
189+
was encountered
190+
184191
Parameters
185192
----------
186193
val : Python int
187194
Value with which to set the flags.
188195
"""
189196
self.int_ = 1
190197
self.sint_ = self.sint_ or (val < 0)
191-
self.uint_ = self.uint_ or (val > oINT64_MAX)
198+
self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
192199

193200
@property
194201
def numeric_(self):
@@ -1263,7 +1270,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
12631270
if not seen.null_:
12641271
seen.saw_int(int(val))
12651272

1266-
if seen.uint_ and seen.sint_:
1273+
if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
12671274
seen.object_ = 1
12681275
break
12691276

pandas/tests/dtypes/test_inference.py

+6
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
388388
result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
389389
tm.assert_almost_equal(result, expected)
390390

391+
def test_convert_uint64_overflow(self):
392+
# see gh-18584
393+
arr = np.array([2**64], dtype=object)
394+
result = lib.maybe_convert_objects(arr)
395+
tm.assert_numpy_array_equal(arr, result)
396+
391397
def test_maybe_convert_objects_uint64(self):
392398
# see gh-4471
393399
arr = np.array([2**63], dtype=object)

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ def test_constructor_overflow_int64(self):
195195
df_crawls = DataFrame(data)
196196
assert df_crawls['uid'].dtype == np.uint64
197197

198+
def test_constructor_overflow_uint64(self):
199+
# see gh-18584
200+
values = np.array([2**64], dtype=object)
201+
result = DataFrame(values)
202+
203+
assert result[0].dtype == object
204+
assert result[0][0] == 2**64
205+
198206
def test_constructor_ordereddict(self):
199207
import random
200208
nitems = 100

0 commit comments

Comments
 (0)