From ed325cdf184d4b4e28b43aeae3aeb7574a66c571 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Mon, 19 Dec 2016 01:40:39 -0500 Subject: [PATCH] BUG: Convert uint64 in maybe_convert_objects Adds handling for uint64 objects during conversion. When negative numbers and uint64 are detected, we then convert the result to object. Picks up where gh-4845 left off. Closes gh-4471. --- doc/source/whatsnew/v0.20.0.txt | 1 + pandas/src/inference.pyx | 32 ++++++++++++++++++---- pandas/tests/frame/test_block_internals.py | 2 +- pandas/tests/types/test_inference.py | 14 ++++++++++ 4 files changed, 43 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 83a70aa34fccf..0dbd3ac6a2ba7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -259,3 +259,4 @@ Bug Fixes - Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`) +- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 5ac2c70bb1808..3fe8092c0041c 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -12,6 +12,8 @@ from util cimport (UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX, # core.common import for fast inference checks +npy_int64_max = np.iinfo(np.int64).max + def is_float(object obj): return util.is_float_object(obj) @@ -722,6 +724,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ndarray[float64_t] floats ndarray[complex128_t] complexes ndarray[int64_t] ints + ndarray[uint64_t] uints ndarray[uint8_t] bools ndarray[int64_t] idatetimes ndarray[int64_t] itimedeltas @@ -731,6 +734,8 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, bint seen_datetimetz = 0 bint seen_timedelta = 0 bint seen_int = 0 + bint seen_uint = 0 + bint seen_sint = 0 bint seen_bool = 0 bint seen_object = 0 bint seen_null = 0 @@ -743,6 +748,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, floats = np.empty(n, dtype='f8') complexes = np.empty(n, dtype='c16') ints = np.empty(n, dtype='i8') + uints = np.empty(n, dtype='u8') bools = np.empty(n, dtype=np.uint8) if convert_datetime: @@ -798,11 +804,21 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, floats[i] = val complexes[i] = val if not seen_null: - try: - ints[i] = val - except OverflowError: + seen_uint = seen_uint or (val > npy_int64_max) + seen_sint = seen_sint or (val < 0) + + if seen_uint and seen_sint: seen_object = 1 break + + if seen_uint: + uints[i] = val + elif seen_sint: + ints[i] = val + else: + uints[i] = val + ints[i] = val + elif util.is_complex_object(val): complexes[i] = val seen_complex = 1 @@ -865,7 +881,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, elif seen_float: return floats elif seen_int: - return ints + if seen_uint: + return uints + else: + return ints elif (not seen_datetime and not seen_numeric and not seen_timedelta): return bools.view(np.bool_) @@ -896,7 +915,10 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not seen_int: return floats elif seen_int: - return ints + if seen_uint: + return uints + else: + return ints elif (not seen_datetime and not seen_numeric and not seen_timedelta): return bools.view(np.bool_) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index e51cc0f5a6ec7..706820b06b12e 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -142,7 +142,7 @@ def test_constructor_with_convert(self): df = DataFrame({'A': [2 ** 63]}) result = df['A'] - expected = Series(np.asarray([2 ** 63], np.object_), name='A') + expected = Series(np.asarray([2 ** 63], np.uint64), name='A') assert_series_equal(result, expected) df = DataFrame({'A': [datetime(2005, 1, 1), True]}) diff --git a/pandas/tests/types/test_inference.py b/pandas/tests/types/test_inference.py index a63ae5f7cf74e..f83ad51c2f648 100644 --- a/pandas/tests/types/test_inference.py +++ b/pandas/tests/types/test_inference.py @@ -254,6 +254,20 @@ def test_convert_non_hashable(self): result = lib.maybe_convert_numeric(arr, set(), False, True) tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan])) + def test_maybe_convert_objects_uint64(self): + # see gh-4471 + arr = np.array([2**63], dtype=object) + exp = np.array([2**63], dtype=np.uint64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2, -1], dtype=object) + exp = np.array([2, -1], dtype=np.int64) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + + arr = np.array([2**63, -1], dtype=object) + exp = np.array([2**63, -1], dtype=object) + tm.assert_numpy_array_equal(lib.maybe_convert_objects(arr), exp) + class TestTypeInference(tm.TestCase): _multiprocess_can_split_ = True