Skip to content

Commit 0ac3d98

Browse files
gfyoungjreback
authored andcommitted
BUG: Don't convert uint64 to object in DataFrame init (#14917)
The hack used to resolve gh-2355 is no longer needed. Removes the hack and patches several tests that relied on this hacky (and buggy) behavior. Closes gh-14881.
1 parent 39efbbc commit 0ac3d98

File tree

4 files changed

+9
-12
lines changed

4 files changed

+9
-12
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ Bug Fixes
243243
~~~~~~~~~
244244

245245
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
246+
- Bug in ``DataFrame`` construction in which unsigned 64-bit integer elements were being converted to objects (:issue:`14881`)
246247
- Bug in ``astype()`` where ``inf`` values were incorrectly converted to integers. Now raises error now with ``astype()`` for Series and DataFrames (:issue:`14265`)
247248

248249

pandas/core/internals.py

-5
Original file line numberDiff line numberDiff line change
@@ -4314,11 +4314,6 @@ def form_blocks(arrays, names, axes):
43144314
elif is_datetimetz(v):
43154315
datetime_tz_items.append((i, k, v))
43164316
elif issubclass(v.dtype.type, np.integer):
4317-
if v.dtype == np.uint64:
4318-
# HACK #2355 definite overflow
4319-
if (v > 2**63 - 1).any():
4320-
object_items.append((i, k, v))
4321-
continue
43224317
int_items.append((i, k, v))
43234318
elif v.dtype == np.bool_:
43244319
bool_items.append((i, k, v))

pandas/tests/frame/test_constructors.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -183,13 +183,14 @@ def test_constructor_bool(self):
183183
self.assertEqual(df.values.dtype, np.bool_)
184184

185185
def test_constructor_overflow_int64(self):
186+
# see gh-14881
186187
values = np.array([2 ** 64 - i for i in range(1, 10)],
187188
dtype=np.uint64)
188189

189190
result = DataFrame({'a': values})
190-
self.assertEqual(result['a'].dtype, object)
191+
self.assertEqual(result['a'].dtype, np.uint64)
191192

192-
# #2355
193+
# see gh-2355
193194
data_scores = [(6311132704823138710, 273), (2685045978526272070, 23),
194195
(8921811264899370420, 45),
195196
(long(17019687244989530680), 270),
@@ -198,7 +199,7 @@ def test_constructor_overflow_int64(self):
198199
data = np.zeros((len(data_scores),), dtype=dtype)
199200
data[:] = data_scores
200201
df_crawls = DataFrame(data)
201-
self.assertEqual(df_crawls['uid'].dtype, object)
202+
self.assertEqual(df_crawls['uid'].dtype, np.uint64)
202203

203204
def test_constructor_ordereddict(self):
204205
import random

pandas/tests/frame/test_operators.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -378,10 +378,10 @@ def test_arith_flex_frame(self):
378378
result = getattr(self.mixed_int, op)(2 + self.mixed_int)
379379
exp = f(self.mixed_int, 2 + self.mixed_int)
380380

381-
# overflow in the uint
381+
# no overflow in the uint
382382
dtype = None
383383
if op in ['sub']:
384-
dtype = dict(B='object', C=None)
384+
dtype = dict(B='uint64', C=None)
385385
elif op in ['add', 'mul']:
386386
dtype = dict(C=None)
387387
assert_frame_equal(result, exp)
@@ -410,10 +410,10 @@ def test_arith_flex_frame(self):
410410
2 + self.mixed_int)
411411
exp = f(self.mixed_int, 2 + self.mixed_int)
412412

413-
# overflow in the uint
413+
# no overflow in the uint
414414
dtype = None
415415
if op in ['sub']:
416-
dtype = dict(B='object', C=None)
416+
dtype = dict(B='uint64', C=None)
417417
elif op in ['add', 'mul']:
418418
dtype = dict(C=None)
419419
assert_frame_equal(result, exp)

0 commit comments

Comments
 (0)