From 351097b18662bea8cadb7d0baed87d4cc45f2156 Mon Sep 17 00:00:00 2001 From: Phillip Cloud Date: Tue, 10 Jun 2014 10:54:24 -0400 Subject: [PATCH] BUG/DTYPES: preserve bools in convert_objects --- doc/source/v0.14.1.txt | 2 ++ pandas/src/inference.pyx | 63 ++++++++++++++++++++----------------- pandas/tests/test_series.py | 12 +++++++ 3 files changed, 48 insertions(+), 29 deletions(-) diff --git a/doc/source/v0.14.1.txt b/doc/source/v0.14.1.txt index 2b797dc295354..2b76da1434ba3 100644 --- a/doc/source/v0.14.1.txt +++ b/doc/source/v0.14.1.txt @@ -216,3 +216,5 @@ Bug Fixes (:issue:`7408`) - Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`, :issue:`7409`). +- Bug where bool objects were converted to ``nan`` in ``convert_objects`` + (:issue:`7416`). diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 3aa71ad02ba6a..19c1fc7522961 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -427,86 +427,91 @@ cdef extern from "parse_helper.h": cdef double fINT64_MAX = INT64_MAX cdef double fINT64_MIN = INT64_MIN -def maybe_convert_numeric(ndarray[object] values, set na_values, - convert_empty=True, coerce_numeric=False): + +def maybe_convert_numeric(object[:] values, set na_values, + bint convert_empty=True, bint coerce_numeric=False): ''' Type inference function-- convert strings to numeric (potentially) and convert to proper dtype array ''' cdef: int status - Py_ssize_t i, n - ndarray[float64_t] floats - ndarray[complex128_t] complexes - ndarray[int64_t] ints - bint seen_float = 0 - bint seen_complex = 0 + Py_ssize_t i, n = values.size + ndarray[float64_t] floats = np.empty(n, dtype='f8') + ndarray[complex128_t] complexes = np.empty(n, dtype='c16') + ndarray[int64_t] ints = np.empty(n, dtype='i8') + ndarray[uint8_t] bools = np.empty(n, dtype='u1') + bint seen_float = False + bint seen_complex = False + bint seen_int = False + bint seen_bool = False object val float64_t fval - n = len(values) - - floats = np.empty(n, dtype='f8') - complexes = np.empty(n, dtype='c16') - ints = np.empty(n, dtype='i8') - - for i from 0 <= i < n: + for i in range(n): val = values[i] if val in na_values: floats[i] = complexes[i] = nan - seen_float = 1 + seen_float = True elif util.is_float_object(val): floats[i] = complexes[i] = val - seen_float = 1 + seen_float = True elif util.is_integer_object(val): floats[i] = ints[i] = val - seen_int = 1 + seen_int = True + elif util.is_bool_object(val): + floats[i] = ints[i] = bools[i] = val + seen_bool = True elif val is None: floats[i] = complexes[i] = nan - seen_float = 1 - elif hasattr(val,'__len__') and len(val) == 0: + seen_float = True + elif hasattr(val, '__len__') and len(val) == 0: if convert_empty or coerce_numeric: floats[i] = complexes[i] = nan - seen_float = 1 + seen_float = True else: raise ValueError('Empty string encountered') elif util.is_complex_object(val): complexes[i] = val - seen_complex = 1 + seen_complex = True else: try: status = floatify(val, &fval) floats[i] = fval if not seen_float: if '.' in val or fval == INF or fval == NEGINF: - seen_float = 1 + seen_float = True elif 'inf' in val: # special case to handle +/-inf - seen_float = 1 + seen_float = True elif fval < fINT64_MAX and fval > fINT64_MIN: try: ints[i] = int(val) except ValueError: ints[i] = fval else: - seen_float = 1 + seen_float = True except: if not coerce_numeric: raise floats[i] = nan - seen_float = 1 - + seen_float = True if seen_complex: return complexes elif seen_float: return floats - else: + elif seen_int: return ints + elif seen_bool: + return bools.view(np.bool_) + return ints + def maybe_convert_objects(ndarray[object] objects, bint try_float=0, - bint safe=0, bint convert_datetime=0, bint convert_timedelta=0): + bint safe=0, bint convert_datetime=0, + bint convert_timedelta=0): ''' Type inference function-- convert object array to proper dtype ''' diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 3881ed5277b85..85e451541d39c 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -5025,6 +5025,18 @@ def test_convert_objects(self): result = s.convert_objects(convert_dates='coerce') assert_series_equal(result, s) + def test_convert_objects_preserve_bool(self): + s = Series([1, True, 3, 5], dtype=object) + r = s.convert_objects(convert_numeric=True) + e = Series([1, 1, 3, 5], dtype='i8') + tm.assert_series_equal(r, e) + + def test_convert_objects_preserve_all_bool(self): + s = Series([False, True, False, False], dtype=object) + r = s.convert_objects(convert_numeric=True) + e = Series([False, True, False, False], dtype=bool) + tm.assert_series_equal(r, e) + def test_apply_args(self): s = Series(['foo,bar'])