Skip to content

Commit ed325cd

Browse files
committed
BUG: Convert uint64 in maybe_convert_objects
Adds handling for uint64 objects during conversion. When negative numbers and uint64 are detected, we then convert the result to object. Picks up where gh-4845 left off. Closes gh-4471.
1 parent 3ccb501 commit ed325cd

File tree

4 files changed

+43
-6
lines changed

4 files changed

+43
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -259,3 +259,4 @@ Bug Fixes
259259

260260

261261
- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
262+
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)

pandas/src/inference.pyx

+27-5
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
1212

1313
# core.common import for fast inference checks
1414

15+
npy_int64_max = np.iinfo(np.int64).max
16+
1517

1618
def is_float(object obj):
1719
return util.is_float_object(obj)
@@ -722,6 +724,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
722724
ndarray[float64_t] floats
723725
ndarray[complex128_t] complexes
724726
ndarray[int64_t] ints
727+
ndarray[uint64_t] uints
725728
ndarray[uint8_t] bools
726729
ndarray[int64_t] idatetimes
727730
ndarray[int64_t] itimedeltas
@@ -731,6 +734,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
731734
bint seen_datetimetz = 0
732735
bint seen_timedelta = 0
733736
bint seen_int = 0
737+
bint seen_uint = 0
738+
bint seen_sint = 0
734739
bint seen_bool = 0
735740
bint seen_object = 0
736741
bint seen_null = 0
@@ -743,6 +748,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
743748
floats = np.empty(n, dtype='f8')
744749
complexes = np.empty(n, dtype='c16')
745750
ints = np.empty(n, dtype='i8')
751+
uints = np.empty(n, dtype='u8')
746752
bools = np.empty(n, dtype=np.uint8)
747753

748754
if convert_datetime:
@@ -798,11 +804,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
798804
floats[i] = <float64_t> val
799805
complexes[i] = <double complex> val
800806
if not seen_null:
801-
try:
802-
ints[i] = val
803-
except OverflowError:
807+
seen_uint = seen_uint or (val > npy_int64_max)
808+
seen_sint = seen_sint or (val < 0)
809+
810+
if seen_uint and seen_sint:
804811
seen_object = 1
805812
break
813+
814+
if seen_uint:
815+
uints[i] = val
816+
elif seen_sint:
817+
ints[i] = val
818+
else:
819+
uints[i] = val
820+
ints[i] = val
821+
806822
elif util.is_complex_object(val):
807823
complexes[i] = val
808824
seen_complex = 1
@@ -865,7 +881,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
865881
elif seen_float:
866882
return floats
867883
elif seen_int:
868-
return ints
884+
if seen_uint:
885+
return uints
886+
else:
887+
return ints
869888
elif (not seen_datetime and not seen_numeric
870889
and not seen_timedelta):
871890
return bools.view(np.bool_)
@@ -896,7 +915,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
896915
if not seen_int:
897916
return floats
898917
elif seen_int:
899-
return ints
918+
if seen_uint:
919+
return uints
920+
else:
921+
return ints
900922
elif (not seen_datetime and not seen_numeric
901923
and not seen_timedelta):
902924
return bools.view(np.bool_)

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def test_constructor_with_convert(self):
142142

143143
df = DataFrame({'A': [2 ** 63]})
144144
result = df['A']
145-
expected = Series(np.asarray([2 ** 63], np.object_), name='A')
145+
expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
146146
assert_series_equal(result, expected)
147147

148148
df = DataFrame({'A': [datetime(2005, 1, 1), True]})

pandas/tests/types/test_inference.py

+14
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
254254
result = lib.maybe_convert_numeric(arr, set(), False, True)
255255
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
256256

257+
def test_maybe_convert_objects_uint64(self):
258+
# see gh-4471
259+
arr = np.array([2**63], dtype=object)
260+
exp = np.array([2**63], dtype=np.uint64)
261+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
262+
263+
arr = np.array([2, -1], dtype=object)
264+
exp = np.array([2, -1], dtype=np.int64)
265+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
266+
267+
arr = np.array([2**63, -1], dtype=object)
268+
exp = np.array([2**63, -1], dtype=object)
269+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
270+
257271

258272
class TestTypeInference(tm.TestCase):
259273
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)