Skip to content

Commit 0c52813

Browse files
gfyoungjreback
authored andcommitted
BUG: Convert uint64 in maybe_convert_objects
Adds handling for `uint64` objects during conversion. When negative numbers and `uint64` are detected, we then convert the result to `object`. Picks up where #4845 left off. Closes #4471. Author: gfyoung <[email protected]> Closes #14916 from gfyoung/convert-objects-uint64 and squashes the following commits: ed325cd [gfyoung] BUG: Convert uint64 in maybe_convert_objects
1 parent b35c689 commit 0c52813

File tree

4 files changed

+43
-6
lines changed

4 files changed

+43
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -262,3 +262,4 @@ Bug Fixes
262262

263263
- Bug in ``Series.unique()`` in which unsigned 64-bit integers were causing overflow (:issue:`14721`)
264264
- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
265+
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)

pandas/src/inference.pyx

+27-5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
1313

1414
# core.common import for fast inference checks
1515

16+
npy_int64_max = np.iinfo(np.int64).max
17+
1618

1719
def is_float(object obj):
1820
return util.is_float_object(obj)
@@ -730,6 +732,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
730732
ndarray[float64_t] floats
731733
ndarray[complex128_t] complexes
732734
ndarray[int64_t] ints
735+
ndarray[uint64_t] uints
733736
ndarray[uint8_t] bools
734737
ndarray[int64_t] idatetimes
735738
ndarray[int64_t] itimedeltas
@@ -739,6 +742,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
739742
bint seen_datetimetz = 0
740743
bint seen_timedelta = 0
741744
bint seen_int = 0
745+
bint seen_uint = 0
746+
bint seen_sint = 0
742747
bint seen_bool = 0
743748
bint seen_object = 0
744749
bint seen_null = 0
@@ -751,6 +756,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
751756
floats = np.empty(n, dtype='f8')
752757
complexes = np.empty(n, dtype='c16')
753758
ints = np.empty(n, dtype='i8')
759+
uints = np.empty(n, dtype='u8')
754760
bools = np.empty(n, dtype=np.uint8)
755761

756762
if convert_datetime:
@@ -806,11 +812,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
806812
floats[i] = <float64_t> val
807813
complexes[i] = <double complex> val
808814
if not seen_null:
809-
try:
810-
ints[i] = val
811-
except OverflowError:
815+
seen_uint = seen_uint or (val > npy_int64_max)
816+
seen_sint = seen_sint or (val < 0)
817+
818+
if seen_uint and seen_sint:
812819
seen_object = 1
813820
break
821+
822+
if seen_uint:
823+
uints[i] = val
824+
elif seen_sint:
825+
ints[i] = val
826+
else:
827+
uints[i] = val
828+
ints[i] = val
829+
814830
elif util.is_complex_object(val):
815831
complexes[i] = val
816832
seen_complex = 1
@@ -873,7 +889,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
873889
elif seen_float:
874890
return floats
875891
elif seen_int:
876-
return ints
892+
if seen_uint:
893+
return uints
894+
else:
895+
return ints
877896
elif (not seen_datetime and not seen_numeric
878897
and not seen_timedelta):
879898
return bools.view(np.bool_)
@@ -904,7 +923,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
904923
if not seen_int:
905924
return floats
906925
elif seen_int:
907-
return ints
926+
if seen_uint:
927+
return uints
928+
else:
929+
return ints
908930
elif (not seen_datetime and not seen_numeric
909931
and not seen_timedelta):
910932
return bools.view(np.bool_)

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def test_constructor_with_convert(self):
142142

143143
df = DataFrame({'A': [2 ** 63]})
144144
result = df['A']
145-
expected = Series(np.asarray([2 ** 63], np.object_), name='A')
145+
expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
146146
assert_series_equal(result, expected)
147147

148148
df = DataFrame({'A': [datetime(2005, 1, 1), True]})

pandas/tests/types/test_inference.py

+14
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
254254
result = lib.maybe_convert_numeric(arr, set(), False, True)
255255
tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
256256

257+
def test_maybe_convert_objects_uint64(self):
258+
# see gh-4471
259+
arr = np.array([2**63], dtype=object)
260+
exp = np.array([2**63], dtype=np.uint64)
261+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
262+
263+
arr = np.array([2, -1], dtype=object)
264+
exp = np.array([2, -1], dtype=np.int64)
265+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
266+
267+
arr = np.array([2**63, -1], dtype=object)
268+
exp = np.array([2**63, -1], dtype=object)
269+
tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
270+
257271

258272
class TestTypeInference(tm.TestCase):
259273
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)