Skip to content

BUG/DTYPES: preserve bools in convert_objects #7416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/v0.14.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,5 @@ Bug Fixes
(:issue:`7408`)
- Bug where ``NaT`` wasn't repr'd correctly in a ``MultiIndex`` (:issue:`7406`,
:issue:`7409`).
- Bug where bool objects were converted to ``nan`` in ``convert_objects``
(:issue:`7416`).
63 changes: 34 additions & 29 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -427,86 +427,91 @@ cdef extern from "parse_helper.h":
cdef double fINT64_MAX = <double> INT64_MAX
cdef double fINT64_MIN = <double> INT64_MIN

def maybe_convert_numeric(ndarray[object] values, set na_values,
convert_empty=True, coerce_numeric=False):

def maybe_convert_numeric(object[:] values, set na_values,
bint convert_empty=True, bint coerce_numeric=False):
'''
Type inference function-- convert strings to numeric (potentially) and
convert to proper dtype array
'''
cdef:
int status
Py_ssize_t i, n
ndarray[float64_t] floats
ndarray[complex128_t] complexes
ndarray[int64_t] ints
bint seen_float = 0
bint seen_complex = 0
Py_ssize_t i, n = values.size
ndarray[float64_t] floats = np.empty(n, dtype='f8')
ndarray[complex128_t] complexes = np.empty(n, dtype='c16')
ndarray[int64_t] ints = np.empty(n, dtype='i8')
ndarray[uint8_t] bools = np.empty(n, dtype='u1')
bint seen_float = False
bint seen_complex = False
bint seen_int = False
bint seen_bool = False
object val
float64_t fval

n = len(values)

floats = np.empty(n, dtype='f8')
complexes = np.empty(n, dtype='c16')
ints = np.empty(n, dtype='i8')

for i from 0 <= i < n:
for i in range(n):
val = values[i]

if val in na_values:
floats[i] = complexes[i] = nan
seen_float = 1
seen_float = True
elif util.is_float_object(val):
floats[i] = complexes[i] = val
seen_float = 1
seen_float = True
elif util.is_integer_object(val):
floats[i] = ints[i] = val
seen_int = 1
seen_int = True
elif util.is_bool_object(val):
floats[i] = ints[i] = bools[i] = val
seen_bool = True
elif val is None:
floats[i] = complexes[i] = nan
seen_float = 1
elif hasattr(val,'__len__') and len(val) == 0:
seen_float = True
elif hasattr(val, '__len__') and len(val) == 0:
if convert_empty or coerce_numeric:
floats[i] = complexes[i] = nan
seen_float = 1
seen_float = True
else:
raise ValueError('Empty string encountered')
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
seen_complex = True
else:
try:
status = floatify(val, &fval)
floats[i] = fval
if not seen_float:
if '.' in val or fval == INF or fval == NEGINF:
seen_float = 1
seen_float = True
elif 'inf' in val: # special case to handle +/-inf
seen_float = 1
seen_float = True
elif fval < fINT64_MAX and fval > fINT64_MIN:
try:
ints[i] = int(val)
except ValueError:
ints[i] = <int64_t> fval
else:
seen_float = 1
seen_float = True
except:
if not coerce_numeric:
raise

floats[i] = nan
seen_float = 1

seen_float = True

if seen_complex:
return complexes
elif seen_float:
return floats
else:
elif seen_int:
return ints
elif seen_bool:
return bools.view(np.bool_)
return ints


def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
bint safe=0, bint convert_datetime=0, bint convert_timedelta=0):
bint safe=0, bint convert_datetime=0,
bint convert_timedelta=0):
'''
Type inference function-- convert object array to proper dtype
'''
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5025,6 +5025,18 @@ def test_convert_objects(self):
result = s.convert_objects(convert_dates='coerce')
assert_series_equal(result, s)

def test_convert_objects_preserve_bool(self):
s = Series([1, True, 3, 5], dtype=object)
r = s.convert_objects(convert_numeric=True)
e = Series([1, 1, 3, 5], dtype='i8')
tm.assert_series_equal(r, e)

def test_convert_objects_preserve_all_bool(self):
s = Series([False, True, False, False], dtype=object)
r = s.convert_objects(convert_numeric=True)
e = Series([False, True, False, False], dtype=bool)
tm.assert_series_equal(r, e)

def test_apply_args(self):
s = Series(['foo,bar'])

Expand Down