From ed325cdf184d4b4e28b43aeae3aeb7574a66c571 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 19 Dec 2016 01:40:39 -0500
Subject: [PATCH] BUG: Convert uint64 in maybe_convert_objects

Adds handling for uint64 objects during conversion.
When negative numbers and uint64 are detected, we
then convert the result to object.

Picks up where gh-4845 left off. Closes gh-4471.
---
 doc/source/whatsnew/v0.20.0.txt            |  1 +
 pandas/src/inference.pyx                   | 32 ++++++++++++++++++----
 pandas/tests/frame/test_block_internals.py |  2 +-
 pandas/tests/types/test_inference.py       | 14 ++++++++++
 4 files changed, 43 insertions(+), 6 deletions(-)
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 83a70aa34fccf..0dbd3ac6a2ba7 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -259,3 +259,4 @@ Bug Fixes
 
 
 - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
+- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`)
diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx
index 5ac2c70bb1808..3fe8092c0041c 100644
--- a/pandas/src/inference.pyx
+++ b/pandas/src/inference.pyx
@@ -12,6 +12,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX,
 
 # core.common import for fast inference checks
 
+npy_int64_max = np.iinfo(np.int64).max
+
 
 def is_float(object obj):
     return util.is_float_object(obj)
@@ -722,6 +724,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         ndarray[float64_t] floats
         ndarray[complex128_t] complexes
         ndarray[int64_t] ints
+        ndarray[uint64_t] uints
         ndarray[uint8_t] bools
         ndarray[int64_t] idatetimes
         ndarray[int64_t] itimedeltas
@@ -731,6 +734,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
         bint seen_datetimetz = 0
         bint seen_timedelta = 0
         bint seen_int = 0
+        bint seen_uint = 0
+        bint seen_sint = 0
         bint seen_bool = 0
         bint seen_object = 0
         bint seen_null = 0
@@ -743,6 +748,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
     floats = np.empty(n, dtype='f8')
     complexes = np.empty(n, dtype='c16')
     ints = np.empty(n, dtype='i8')
+    uints = np.empty(n, dtype='u8')
     bools = np.empty(n, dtype=np.uint8)
 
     if convert_datetime:
@@ -798,11 +804,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
             floats[i] = <float64_t> val
             complexes[i] = <double complex> val
             if not seen_null:
-                try:
-                    ints[i] = val
-                except OverflowError:
+                seen_uint = seen_uint or (val > npy_int64_max)
+                seen_sint = seen_sint or (val < 0)
+
+                if seen_uint and seen_sint:
                     seen_object = 1
                     break
+
+                if seen_uint:
+                    uints[i] = val
+                elif seen_sint:
+                    ints[i] = val
+                else:
+                    uints[i] = val
+                    ints[i] = val
+
         elif util.is_complex_object(val):
             complexes[i] = val
             seen_complex = 1
@@ -865,7 +881,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                         elif seen_float:
                             return floats
                         elif seen_int:
-                            return ints
+                            if seen_uint:
+                                return uints
+                            else:
+                                return ints
                 elif (not seen_datetime and not seen_numeric
                       and not seen_timedelta):
                     return bools.view(np.bool_)
@@ -896,7 +915,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
                             if not seen_int:
                                 return floats
                         elif seen_int:
-                            return ints
+                            if seen_uint:
+                                return uints
+                            else:
+                                return ints
                 elif (not seen_datetime and not seen_numeric
                       and not seen_timedelta):
                     return bools.view(np.bool_)
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index e51cc0f5a6ec7..706820b06b12e 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -142,7 +142,7 @@ def test_constructor_with_convert(self):
 
         df = DataFrame({'A': [2 ** 63]})
         result = df['A']
-        expected = Series(np.asarray([2 ** 63], np.object_), name='A')
+        expected = Series(np.asarray([2 ** 63], np.uint64), name='A')
         assert_series_equal(result, expected)
 
         df = DataFrame({'A': [datetime(2005, 1, 1), True]})
diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py
index a63ae5f7cf74e..f83ad51c2f648 100644
--- a/pandas/tests/types/test_inference.py
+++ b/pandas/tests/types/test_inference.py
@@ -254,6 +254,20 @@ def test_convert_non_hashable(self):
         result = lib.maybe_convert_numeric(arr, set(), False, True)
         tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
 
+    def test_maybe_convert_objects_uint64(self):
+        # see gh-4471
+        arr = np.array([2**63], dtype=object)
+        exp = np.array([2**63], dtype=np.uint64)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
+        arr = np.array([2, -1], dtype=object)
+        exp = np.array([2, -1], dtype=np.int64)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
+        arr = np.array([2**63, -1], dtype=object)
+        exp = np.array([2**63, -1], dtype=object)
+        tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp)
+
 
 class TestTypeInference(tm.TestCase):
     _multiprocess_can_split_ = True