Skip to content

BUG: Various issues with maybe_convert_objects (GH #2845) #2846

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 14, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 53 additions & 53 deletions pandas/src/inference.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
bint seen_bool = 0
bint seen_object = 0
bint seen_null = 0
bint seen_numeric = 0
object val, onan
float64_t fval, fnan

Expand Down Expand Up @@ -437,12 +438,17 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
else:
seen_object = 1
# objects[i] = val.astype('O')
break
elif util.is_integer_object(val):
seen_int = 1
floats[i] = <float64_t> val
complexes[i] = <double complex> val
if not seen_null:
ints[i] = val
try:
ints[i] = val
except OverflowError:
seen_object = 1
break
elif util.is_complex_object(val):
complexes[i] = val
seen_complex = 1
Expand All @@ -452,6 +458,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
idatetimes[i] = convert_to_tsobject(val, None).value
else:
seen_object = 1
break
elif try_float and not util.is_string_object(val):
# this will convert Decimal objects
try:
Expand All @@ -460,72 +467,65 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
seen_float = 1
except Exception:
seen_object = 1
break
else:
seen_object = 1
break

if not safe:
if seen_null:
if (seen_float or seen_int) and not seen_object:
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
if seen_object:
return objects
elif not seen_bool:
if seen_datetime:
if seen_complex or seen_float or seen_int:
return objects
else:
return datetimes
else:
seen_numeric = seen_complex or seen_float or seen_int

if not seen_object:

if not safe:
if seen_null:
if not seen_bool and not seen_datetime:
if seen_complex:
return complexes
elif seen_float:
elif seen_float or seen_int:
return floats
elif seen_int:
return ints
else:
if not seen_float and not seen_int:
if not seen_bool:
if seen_datetime:
if not seen_numeric:
return datetimes
else:
if seen_complex:
return complexes
elif seen_float:
return floats
elif seen_int:
return ints
elif not seen_datetime and not seen_numeric:
return bools.view(np.bool_)

return objects
else:
# don't cast int to float, etc.
if seen_null:
if (seen_float or seen_int) and not seen_object:
if seen_complex:
return complexes
else:
return floats
else:
return objects
else:
if seen_object:
return objects
elif not seen_bool:
if seen_datetime:
if seen_complex or seen_float or seen_int:
return objects
else:
return datetimes
else:
if seen_int and seen_float:
return objects
elif seen_complex:
return complexes
# don't cast int to float, etc.
if seen_null:
if not seen_bool and not seen_datetime:
if seen_complex:
if not seen_int:
return complexes
elif seen_float:
return floats
elif seen_int:
return ints
if not seen_int:
return floats
else:
if not seen_float and not seen_int:
if not seen_bool:
if seen_datetime:
if not seen_numeric:
return datetimes
else:
if seen_complex:
if not seen_int:
return complexes
elif seen_float:
if not seen_int:
return floats
elif seen_int:
return ints
elif not seen_datetime and not seen_numeric:
return bools.view(np.bool_)

return objects
return objects


def convert_sql_column(x):
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8099,6 +8099,69 @@ def test_as_matrix_lcd(self):
values = self.mixed_int.as_matrix(['C'])
self.assert_(values.dtype == np.uint8)

def test_constructor_with_convert(self):
# this is actually mostly a test of lib.maybe_convert_objects
# #2845
df = DataFrame({'A' : [2**63-1] })
result = df['A']
expected = Series(np.asarray([2**63-1], np.int64))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2**63] })
result = df['A']
expected = Series(np.asarray([2**63], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [datetime(2005, 1, 1), True] })
result = df['A']
expected = Series(np.asarray([datetime(2005, 1, 1), True], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [None, 1] })
result = df['A']
expected = Series(np.asarray([np.nan, 1], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0, 2] })
result = df['A']
expected = Series(np.asarray([1.0, 2], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, 3] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, 3], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, 3.0] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, 3.0], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, True] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, True], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0, None] })
result = df['A']
expected = Series(np.asarray([1.0, np.nan], np.float_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [1.0+2.0j, None] })
result = df['A']
expected = Series(np.asarray([1.0+2.0j, np.nan], np.complex_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2.0, 1, True, None] })
result = df['A']
expected = Series(np.asarray([2.0, 1, True, None], np.object_))
assert_series_equal(result, expected)

df = DataFrame({'A' : [2.0, 1, datetime(2006, 1, 1), None] })
result = df['A']
expected = Series(np.asarray([2.0, 1, datetime(2006, 1, 1),
None], np.object_))
assert_series_equal(result, expected)

def test_constructor_with_datetimes(self):
intname = np.dtype(np.int_).name
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,17 @@ def test_format(self):
expected = [str(index[0])]
self.assertEquals(formatted, expected)

# 2845
index = Index([1, 2.0+3.0j, np.nan])
formatted = index.format()
expected = [str(index[0]), str(index[1]), str(index[2])]
self.assertEquals(formatted, expected)

index = Index([1, 2.0+3.0j, None])
formatted = index.format()
expected = [str(index[0]), str(index[1]), '']
self.assertEquals(formatted, expected)

self.strIndex[:0].format()

def test_format_with_name_time_info(self):
Expand Down