Skip to content

Commit edb04d3

Browse files
committed
BUG: Convert uint64 in maybe_convert_objects
Adds handling for uint64 objects during conversion. When negative numbers and uint64 are detected, we then convert the result to object. Picks up where gh-4845 left off. Closes gh-4471.
1 parent f11501a commit edb04d3

File tree

4 files changed

+42
-6
lines changed

4 files changed

+42
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,4 @@ Bug Fixes
259259

260260

261261
- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
262+
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)

pandas/src/inference.pyx

+26-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
1212

1313
# core.common import for fast inference checks
1414

15+
npy_int64_max = np.iinfo(np.int64).max
1516

1617
def is_float(object obj):
1718
return util.is_float_object(obj)
@@ -722,6 +723,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
722723
ndarray[float64_t] floats
723724
ndarray[complex128_t] complexes
724725
ndarray[int64_t] ints
726+
ndarray[uint64_t] uints
725727
ndarray[uint8_t] bools
726728
ndarray[int64_t] idatetimes
727729
ndarray[int64_t] itimedeltas
@@ -731,6 +733,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
731733
bint seen_datetimetz = 0
732734
bint seen_timedelta = 0
733735
bint seen_int = 0
736+
bint seen_uint = 0
737+
bint seen_sint = 0
734738
bint seen_bool = 0
735739
bint seen_object = 0
736740
bint seen_null = 0
@@ -743,6 +747,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
743747
floats = np.empty(n, dtype='f8')
744748
complexes = np.empty(n, dtype='c16')
745749
ints = np.empty(n, dtype='i8')
750+
uints = np.empty(n, dtype='u8')
746751
bools = np.empty(n, dtype=np.uint8)
747752

748753
if convert_datetime:
@@ -798,11 +803,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
798803
floats[i] = <float64_t> val
799804
complexes[i] = <double complex> val
800805
if not seen_null:
801-
try:
802-
ints[i] = val
803-
except OverflowError:
806+
seen_uint = seen_uint or (val > npy_int64_max)
807+
seen_sint = seen_sint or (val < 0)
808+
809+
if seen_uint and seen_sint:
804810
seen_object = 1
805811
break
812+
813+
if seen_uint:
814+
uints[i] = val
815+
elif seen_sint:
816+
ints[i] = val
817+
else:
818+
uints[i] = val
819+
ints[i] = val
820+
806821
elif util.is_complex_object(val):
807822
complexes[i] = val
808823
seen_complex = 1
@@ -865,7 +880,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
865880
elif seen_float:
866881
return floats
867882
elif seen_int:
868-
return ints
883+
if seen_uint:
884+
return uints
885+
else:
886+
return ints
869887
elif (not seen_datetime and not seen_numeric
870888
and not seen_timedelta):
871889
return bools.view(np.bool_)
@@ -896,7 +914,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
896914
if not seen_int:
897915
return floats
898916
elif seen_int:
899-
return ints
917+
if seen_uint:
918+
return uints
919+
else:
920+
return ints
900921
elif (not seen_datetime and not seen_numeric
901922
and not seen_timedelta):
902923
return bools.view(np.bool_)

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def test_constructor_with_convert(self):
142142

143143
df = DataFrame({'A': [2 ** 63]})
144144
result = df['A']
145-
expected = Series(np.asarray([2 ** 63], np.object_), name='A')
145+
expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
146146
assert_series_equal(result, expected)
147147

148148
df = DataFrame({'A': [datetime(2005, 1, 1), True]})

pandas/tests/types/test_inference.py

+14
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
254254
result = lib.maybe_convert_numeric(arr, set(), False, True)
255255
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
256256

257+
def test_maybe_convert_objects_uint64(self):
258+
# see gh-4471
259+
arr = np.array([2**63], dtype=object)
260+
exp = np.array([2**63], dtype=np.uint64)
261+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
262+
263+
arr = np.array([2, -1], dtype=object)
264+
exp = np.array([2, -1], dtype=np.int64)
265+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
266+
267+
arr = np.array([2**63, -1], dtype=object)
268+
exp = np.array([2**63, -1], dtype=object)
269+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
270+
257271

258272
class TestTypeInference(tm.TestCase):
259273
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)