Skip to content

BUG: Convert uint64 in maybe_convert_objects #14916

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -259,3 +259,4 @@ Bug Fixes


- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)
32 changes: 27 additions & 5 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,

# core.common import for fast inference checks

npy_int64_max = np.iinfo(np.int64).max


def is_float(object obj):
return util.is_float_object(obj)
Expand Down Expand Up @@ -722,6 +724,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
ndarray[float64_t] floats
ndarray[complex128_t] complexes
ndarray[int64_t] ints
ndarray[uint64_t] uints
ndarray[uint8_t] bools
ndarray[int64_t] idatetimes
ndarray[int64_t] itimedeltas
Expand All @@ -731,6 +734,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
bint seen_datetimetz = 0
bint seen_timedelta = 0
bint seen_int = 0
bint seen_uint = 0
bint seen_sint = 0
bint seen_bool = 0
bint seen_object = 0
bint seen_null = 0
Expand All @@ -743,6 +748,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
floats = np.empty(n, dtype='f8')
complexes = np.empty(n, dtype='c16')
ints = np.empty(n, dtype='i8')
uints = np.empty(n, dtype='u8')
bools = np.empty(n, dtype=np.uint8)

if convert_datetime:
Expand Down Expand Up @@ -798,11 +804,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
floats[i] = <float64_t> val
complexes[i] = <double complex> val
if not seen_null:
try:
ints[i] = val
except OverflowError:
seen_uint = seen_uint or (val > npy_int64_max)
seen_sint = seen_sint or (val < 0)

if seen_uint and seen_sint:
seen_object = 1
break

if seen_uint:
uints[i] = val
elif seen_sint:
ints[i] = val
else:
uints[i] = val
ints[i] = val

elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
Expand Down Expand Up @@ -865,7 +881,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
elif seen_float:
return floats
elif seen_int:
return ints
if seen_uint:
return uints
else:
return ints
elif (not seen_datetime and not seen_numeric
and not seen_timedelta):
return bools.view(np.bool_)
Expand Down Expand Up @@ -896,7 +915,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
if not seen_int:
return floats
elif seen_int:
return ints
if seen_uint:
return uints
else:
return ints
elif (not seen_datetime and not seen_numeric
and not seen_timedelta):
return bools.view(np.bool_)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_block_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def test_constructor_with_convert(self):

df = DataFrame({'A': [2 ** 63]})
result = df['A']
expected = Series(np.asarray([2 ** 63], np.object_), name='A')
expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
assert_series_equal(result, expected)

df = DataFrame({'A': [datetime(2005, 1, 1), True]})
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/types/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
result = lib.maybe_convert_numeric(arr, set(), False, True)
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))

def test_maybe_convert_objects_uint64(self):
# see gh-4471
arr = np.array([2**63], dtype=object)
exp = np.array([2**63], dtype=np.uint64)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

arr = np.array([2, -1], dtype=object)
exp = np.array([2, -1], dtype=np.int64)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)

arr = np.array([2**63, -1], dtype=object)
exp = np.array([2**63, -1], dtype=object)
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)


class TestTypeInference(tm.TestCase):
_multiprocess_can_split_ = True
Expand Down