Skip to content

Commit f9c8991

Browse files
committed
BUG: Don't overflow in DataFrame init with uint
For integers larger than what uint64 can handle, we gracefully default to the object dtype instead of overflowing. Closes gh-18584.
1 parent 52fefd5 commit f9c8991

File tree

4 files changed

+17
-3
lines changed

4 files changed

+17
-3
lines changed

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -261,4 +261,4 @@ Other
261261
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
262262
- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
263263
- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
264-
-
264+
- Bug in the :class:`DataFrame` constructor in which data containing very large positive numbers was causing ``OverflowError`` (:issue:`18584`)

pandas/_libs/src/inference.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ cdef class Seen(object):
188188
"""
189189
self.int_ = 1
190190
self.sint_ = self.sint_ or (val < 0)
191-
self.uint_ = self.uint_ or (val > oINT64_MAX)
191+
self.uint_ = self.uint_ or (oINT64_MAX < val <= oUINT64_MAX)
192192

193193
@property
194194
def numeric_(self):
@@ -1263,7 +1263,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
12631263
if not seen.null_:
12641264
seen.saw_int(int(val))
12651265

1266-
if seen.uint_ and seen.sint_:
1266+
if (seen.uint_ and seen.sint_) or val > oUINT64_MAX:
12671267
seen.object_ = 1
12681268
break
12691269

pandas/tests/dtypes/test_inference.py

+6
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,12 @@ def test_convert_numeric_int64_uint64(self, case, coerce):
388388
result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce)
389389
tm.assert_almost_equal(result, expected)
390390

391+
def test_convert_uint64_overflow(self):
392+
# see gh-18584
393+
arr = np.array([2**64], dtype=object)
394+
result = lib.maybe_convert_objects(arr)
395+
tm.assert_numpy_array_equal(arr, result)
396+
391397
def test_maybe_convert_objects_uint64(self):
392398
# see gh-4471
393399
arr = np.array([2**63], dtype=object)

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,14 @@ def test_constructor_overflow_int64(self):
195195
df_crawls = DataFrame(data)
196196
assert df_crawls['uid'].dtype == np.uint64
197197

198+
def test_constructor_overflow_uint64(self):
199+
# see gh-18584
200+
values = np.array([2**64], dtype=object)
201+
result = DataFrame(values)
202+
203+
assert result[0].dtype == object
204+
assert result[0][0] == 2**64
205+
198206
def test_constructor_ordereddict(self):
199207
import random
200208
nitems = 100

0 commit comments

Comments
 (0)