Skip to content

Commit d44e9c7

Browse files
committed
Merge pull request #2846 from stephenwlin/fix-maybe-convert-objects
BUG: Various issues with maybe_convert_objects (GH #2845) thanks!
2 parents fc8de6d + a3bfa7f commit d44e9c7

File tree

3 files changed

+127
-53
lines changed

3 files changed

+127
-53
lines changed

pandas/src/inference.pyx

+53-53
Original file line numberDiff line numberDiff line change
@@ -403,6 +403,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
403403
bint seen_bool = 0
404404
bint seen_object = 0
405405
bint seen_null = 0
406+
bint seen_numeric = 0
406407
object val, onan
407408
float64_t fval, fnan
408409

@@ -437,12 +438,17 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
437438
else:
438439
seen_object = 1
439440
# objects[i] = val.astype('O')
441+
break
440442
elif util.is_integer_object(val):
441443
seen_int = 1
442444
floats[i] = <float64_t> val
443445
complexes[i] = <double complex> val
444446
if not seen_null:
445-
ints[i] = val
447+
try:
448+
ints[i] = val
449+
except OverflowError:
450+
seen_object = 1
451+
break
446452
elif util.is_complex_object(val):
447453
complexes[i] = val
448454
seen_complex = 1
@@ -452,6 +458,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
452458
idatetimes[i] = convert_to_tsobject(val, None).value
453459
else:
454460
seen_object = 1
461+
break
455462
elif try_float and not util.is_string_object(val):
456463
# this will convert Decimal objects
457464
try:
@@ -460,72 +467,65 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0,
460467
seen_float = 1
461468
except Exception:
462469
seen_object = 1
470+
break
463471
else:
464472
seen_object = 1
473+
break
465474

466-
if not safe:
467-
if seen_null:
468-
if (seen_float or seen_int) and not seen_object:
469-
if seen_complex:
470-
return complexes
471-
else:
472-
return floats
473-
else:
474-
return objects
475-
else:
476-
if seen_object:
477-
return objects
478-
elif not seen_bool:
479-
if seen_datetime:
480-
if seen_complex or seen_float or seen_int:
481-
return objects
482-
else:
483-
return datetimes
484-
else:
475+
seen_numeric = seen_complex or seen_float or seen_int
476+
477+
if not seen_object:
478+
479+
if not safe:
480+
if seen_null:
481+
if not seen_bool and not seen_datetime:
485482
if seen_complex:
486483
return complexes
487-
elif seen_float:
484+
elif seen_float or seen_int:
488485
return floats
489-
elif seen_int:
490-
return ints
491486
else:
492-
if not seen_float and not seen_int:
487+
if not seen_bool:
488+
if seen_datetime:
489+
if not seen_numeric:
490+
return datetimes
491+
else:
492+
if seen_complex:
493+
return complexes
494+
elif seen_float:
495+
return floats
496+
elif seen_int:
497+
return ints
498+
elif not seen_datetime and not seen_numeric:
493499
return bools.view(np.bool_)
494500

495-
return objects
496-
else:
497-
# don't cast int to float, etc.
498-
if seen_null:
499-
if (seen_float or seen_int) and not seen_object:
500-
if seen_complex:
501-
return complexes
502-
else:
503-
return floats
504-
else:
505-
return objects
506501
else:
507-
if seen_object:
508-
return objects
509-
elif not seen_bool:
510-
if seen_datetime:
511-
if seen_complex or seen_float or seen_int:
512-
return objects
513-
else:
514-
return datetimes
515-
else:
516-
if seen_int and seen_float:
517-
return objects
518-
elif seen_complex:
519-
return complexes
502+
# don't cast int to float, etc.
503+
if seen_null:
504+
if not seen_bool and not seen_datetime:
505+
if seen_complex:
506+
if not seen_int:
507+
return complexes
520508
elif seen_float:
521-
return floats
522-
elif seen_int:
523-
return ints
509+
if not seen_int:
510+
return floats
524511
else:
525-
if not seen_float and not seen_int:
512+
if not seen_bool:
513+
if seen_datetime:
514+
if not seen_numeric:
515+
return datetimes
516+
else:
517+
if seen_complex:
518+
if not seen_int:
519+
return complexes
520+
elif seen_float:
521+
if not seen_int:
522+
return floats
523+
elif seen_int:
524+
return ints
525+
elif not seen_datetime and not seen_numeric:
526526
return bools.view(np.bool_)
527527

528-
return objects
528+
return objects
529529

530530

531531
def convert_sql_column(x):

pandas/tests/test_frame.py

+63
Original file line numberDiff line numberDiff line change
@@ -8099,6 +8099,69 @@ def test_as_matrix_lcd(self):
80998099
values = self.mixed_int.as_matrix(['C'])
81008100
self.assert_(values.dtype == np.uint8)
81018101

8102+
def test_constructor_with_convert(self):
8103+
# this is actually mostly a test of lib.maybe_convert_objects
8104+
# #2845
8105+
df = DataFrame({'A' : [2**63-1] })
8106+
result = df['A']
8107+
expected = Series(np.asarray([2**63-1], np.int64))
8108+
assert_series_equal(result, expected)
8109+
8110+
df = DataFrame({'A' : [2**63] })
8111+
result = df['A']
8112+
expected = Series(np.asarray([2**63], np.object_))
8113+
assert_series_equal(result, expected)
8114+
8115+
df = DataFrame({'A' : [datetime(2005, 1, 1), True] })
8116+
result = df['A']
8117+
expected = Series(np.asarray([datetime(2005, 1, 1), True], np.object_))
8118+
assert_series_equal(result, expected)
8119+
8120+
df = DataFrame({'A' : [None, 1] })
8121+
result = df['A']
8122+
expected = Series(np.asarray([np.nan, 1], np.float_))
8123+
assert_series_equal(result, expected)
8124+
8125+
df = DataFrame({'A' : [1.0, 2] })
8126+
result = df['A']
8127+
expected = Series(np.asarray([1.0, 2], np.float_))
8128+
assert_series_equal(result, expected)
8129+
8130+
df = DataFrame({'A' : [1.0+2.0j, 3] })
8131+
result = df['A']
8132+
expected = Series(np.asarray([1.0+2.0j, 3], np.complex_))
8133+
assert_series_equal(result, expected)
8134+
8135+
df = DataFrame({'A' : [1.0+2.0j, 3.0] })
8136+
result = df['A']
8137+
expected = Series(np.asarray([1.0+2.0j, 3.0], np.complex_))
8138+
assert_series_equal(result, expected)
8139+
8140+
df = DataFrame({'A' : [1.0+2.0j, True] })
8141+
result = df['A']
8142+
expected = Series(np.asarray([1.0+2.0j, True], np.object_))
8143+
assert_series_equal(result, expected)
8144+
8145+
df = DataFrame({'A' : [1.0, None] })
8146+
result = df['A']
8147+
expected = Series(np.asarray([1.0, np.nan], np.float_))
8148+
assert_series_equal(result, expected)
8149+
8150+
df = DataFrame({'A' : [1.0+2.0j, None] })
8151+
result = df['A']
8152+
expected = Series(np.asarray([1.0+2.0j, np.nan], np.complex_))
8153+
assert_series_equal(result, expected)
8154+
8155+
df = DataFrame({'A' : [2.0, 1, True, None] })
8156+
result = df['A']
8157+
expected = Series(np.asarray([2.0, 1, True, None], np.object_))
8158+
assert_series_equal(result, expected)
8159+
8160+
df = DataFrame({'A' : [2.0, 1, datetime(2006, 1, 1), None] })
8161+
result = df['A']
8162+
expected = Series(np.asarray([2.0, 1, datetime(2006, 1, 1),
8163+
None], np.object_))
8164+
assert_series_equal(result, expected)
81028165

81038166
def test_constructor_with_datetimes(self):
81048167
intname = np.dtype(np.int_).name

pandas/tests/test_index.py

+11
Original file line numberDiff line numberDiff line change
@@ -348,6 +348,17 @@ def test_format(self):
348348
expected = [str(index[0])]
349349
self.assertEquals(formatted, expected)
350350

351+
# 2845
352+
index = Index([1, 2.0+3.0j, np.nan])
353+
formatted = index.format()
354+
expected = [str(index[0]), str(index[1]), str(index[2])]
355+
self.assertEquals(formatted, expected)
356+
357+
index = Index([1, 2.0+3.0j, None])
358+
formatted = index.format()
359+
expected = [str(index[0]), str(index[1]), '']
360+
self.assertEquals(formatted, expected)
361+
351362
self.strIndex[:0].format()
352363

353364
def test_format_with_name_time_info(self):

0 commit comments

Comments
 (0)