pandas-dev · gfyoung · Dec 19, 2016
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -259,3 +259,4 @@ Bug Fixes
 
 
 - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
+- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
@@ -12,6 +12,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
 
 # core.common import for fast inference checks
 
+npy_int64_max = np.iinfo(np.int64).max
+
 
 def is_float(object obj):
     return util.is_float_object(obj)
@@ -722,6 +724,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         ndarray[float64_t] floats
         ndarray[complex128_t] complexes
         ndarray[int64_t] ints
+        ndarray[uint64_t] uints
         ndarray[uint8_t] bools
         ndarray[int64_t] idatetimes
         ndarray[int64_t] itimedeltas
@@ -731,6 +734,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         bint seen_datetimetz = 0
         bint seen_timedelta = 0
         bint seen_int = 0
+        bint seen_uint = 0
+        bint seen_sint = 0
         bint seen_bool = 0
         bint seen_object = 0
         bint seen_null = 0
@@ -743,6 +748,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     floats = np.empty(n, dtype='f8')
     complexes = np.empty(n, dtype='c16')
     ints = np.empty(n, dtype='i8')
+    uints = np.empty(n, dtype='u8')
     bools = np.empty(n, dtype=np.uint8)
 
     if convert_datetime:
@@ -798,11 +804,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             floats[i] = <float64_t> val
             complexes[i] = <double complex> val
             if not seen_null:
-                try:
-                    ints[i] = val
-                except OverflowError:
+                seen_uint = seen_uint or (val > npy_int64_max)
+                seen_sint = seen_sint or (val < 0)
+
+                if seen_uint and seen_sint:
                     seen_object = 1
                     break
+
+                if seen_uint:
+                    uints[i] = val
+                elif seen_sint:
+                    ints[i] = val
+                else:
+                    uints[i] = val
+                    ints[i] = val
+
         elif util.is_complex_object(val):
             complexes[i] = val
             seen_complex = 1
@@ -865,7 +881,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                         elif seen_float:
                             return floats
                         elif seen_int:
-                            return ints
+                            if seen_uint:
+                                return uints
+                            else:
+                                return ints
                 elif (not seen_datetime and not seen_numeric
                       and not seen_timedelta):
                     return bools.view(np.bool_)
@@ -896,7 +915,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                             if not seen_int:
                                 return floats
                         elif seen_int:
-                            return ints
+                            if seen_uint:
+                                return uints
+                            else:
+                                return ints
                 elif (not seen_datetime and not seen_numeric
                       and not seen_timedelta):
                     return bools.view(np.bool_)

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
@@ -142,7 +142,7 @@ def test_constructor_with_convert(self):
 
         df = DataFrame({'A': [2 ** 63]})
         result = df['A']
-        expected = Series(np.asarray([2 ** 63], np.object_), name='A')
+        expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
         assert_series_equal(result, expected)
 
         df = DataFrame({'A': [datetime(2005, 1, 1), True]})

diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py
@@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
         result = lib.maybe_convert_numeric(arr, set(), False, True)
         tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
 
+    def test_maybe_convert_objects_uint64(self):
+        # see gh-4471
+        arr = np.array([2**63], dtype=object)
+        exp = np.array([2**63], dtype=np.uint64)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
+        arr = np.array([2, -1], dtype=object)
+        exp = np.array([2, -1], dtype=np.int64)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
+        arr = np.array([2**63, -1], dtype=object)
+        exp = np.array([2**63, -1], dtype=object)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
 
 class TestTypeInference(tm.TestCase):
     _multiprocess_can_split_ = True
Original file line number	Diff line number	Diff line change
Expand Up		@@ -259,3 +259,4 @@ Bug Fixes


		- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
		- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)