Skip to content

Commit af85719

Browse files
committed
Merge pull request #4120 from jreback/convert_objects
BUG: GH4119 Fixed bug in convert_objects(convert_numeric=True) where a mixed numeric and object not converting
2 parents e529ee8 + 269e5a3 commit af85719

File tree

5 files changed

+33
-12
lines changed

5 files changed

+33
-12
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,8 @@ pandas 0.12
316316
- Better error messages on inserting incompatible columns to a frame (:issue:`4107`)
317317
- Fixed bug in ``DataFrame.replace`` where a nested dict wasn't being
318318
iterated over when regex=False (:issue:`4115`)
319+
- Fixed bug in ``convert_objects(convert_numeric=True)`` where a mixed numeric and
320+
object Series/Frame was not converting properly (:issue:`4119`)
319321

320322

321323
pandas 0.11.0

pandas/core/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -928,7 +928,7 @@ def astype(self, dtype):
928928
return self._constructor(values, index=self.index, name=self.name,
929929
dtype=values.dtype)
930930

931-
def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
931+
def convert_objects(self, convert_dates=True, convert_numeric=False, copy=True):
932932
"""
933933
Attempt to infer better dtype
934934

pandas/src/inference.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -379,11 +379,14 @@ def maybe_convert_numeric(ndarray[object] values, set na_values,
379379
elif util.is_float_object(val):
380380
floats[i] = complexes[i] = val
381381
seen_float = 1
382+
elif util.is_integer_object(val):
383+
floats[i] = ints[i] = val
384+
seen_int = 1
382385
elif val is None:
383386
floats[i] = complexes[i] = nan
384387
seen_float = 1
385-
elif len(val) == 0:
386-
if convert_empty:
388+
elif hasattr(val,'__len__') and len(val) == 0:
389+
if convert_empty or coerce_numeric:
387390
floats[i] = complexes[i] = nan
388391
seen_float = 1
389392
else:

pandas/tests/test_frame.py

+6
Original file line numberDiff line numberDiff line change
@@ -5476,6 +5476,12 @@ def test_convert_objects(self):
54765476
converted = self.mixed_frame.copy()
54775477
self.assertRaises(Exception, converted['H'].astype, 'int32')
54785478

5479+
# mixed in a single column
5480+
df = DataFrame(dict(s = Series([1, 'na', 3 ,4])))
5481+
result = df.convert_objects(convert_numeric=True)
5482+
expected = DataFrame(dict(s = Series([1, np.nan, 3 ,4])))
5483+
assert_frame_equal(result, expected)
5484+
54795485
def test_convert_objects_no_conversion(self):
54805486
mixed1 = DataFrame(
54815487
{'a': [1, 2, 3], 'b': [4.0, 5, 6], 'c': ['x', 'y', 'z']})

pandas/tests/test_series.py

+19-9
Original file line numberDiff line numberDiff line change
@@ -3430,44 +3430,54 @@ def test_convert_objects(self):
34303430

34313431
s = Series([1., 2, 3],index=['a','b','c'])
34323432
result = s.convert_objects(convert_dates=False,convert_numeric=True)
3433-
assert_series_equal(s,result)
3433+
assert_series_equal(result, s)
34343434

34353435
# force numeric conversion
34363436
r = s.copy().astype('O')
34373437
r['a'] = '1'
34383438
result = r.convert_objects(convert_dates=False,convert_numeric=True)
3439-
assert_series_equal(s,result)
3439+
assert_series_equal(result, s)
34403440

34413441
r = s.copy().astype('O')
34423442
r['a'] = '1.'
34433443
result = r.convert_objects(convert_dates=False,convert_numeric=True)
3444-
assert_series_equal(s,result)
3444+
assert_series_equal(result, s)
34453445

34463446
r = s.copy().astype('O')
34473447
r['a'] = 'garbled'
34483448
expected = s.copy()
34493449
expected['a'] = np.nan
34503450
result = r.convert_objects(convert_dates=False,convert_numeric=True)
3451-
assert_series_equal(expected,result)
3451+
assert_series_equal(result, expected)
3452+
3453+
# GH 4119, not converting a mixed type (e.g.floats and object)
3454+
s = Series([1, 'na', 3 ,4])
3455+
result = s.convert_objects(convert_numeric=True)
3456+
expected = Series([1,np.nan,3,4])
3457+
assert_series_equal(result, expected)
3458+
3459+
s = Series([1, '', 3 ,4])
3460+
result = s.convert_objects(convert_numeric=True)
3461+
expected = Series([1,np.nan,3,4])
3462+
assert_series_equal(result, expected)
34523463

34533464
# dates
34543465
s = Series([datetime(2001,1,1,0,0), datetime(2001,1,2,0,0), datetime(2001,1,3,0,0) ])
34553466
s2 = Series([datetime(2001,1,1,0,0), datetime(2001,1,2,0,0), datetime(2001,1,3,0,0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'],dtype='O')
34563467

34573468
result = s.convert_objects(convert_dates=True,convert_numeric=False)
34583469
expected = Series([Timestamp('20010101'),Timestamp('20010102'),Timestamp('20010103')],dtype='M8[ns]')
3459-
assert_series_equal(expected,result)
3470+
assert_series_equal(result, expected)
34603471

34613472
result = s.convert_objects(convert_dates='coerce',convert_numeric=False)
3462-
assert_series_equal(expected,result)
34633473
result = s.convert_objects(convert_dates='coerce',convert_numeric=True)
3464-
assert_series_equal(expected,result)
3474+
assert_series_equal(result, expected)
34653475

34663476
expected = Series([Timestamp('20010101'),Timestamp('20010102'),Timestamp('20010103'),lib.NaT,lib.NaT,lib.NaT,Timestamp('20010104'),Timestamp('20010105')],dtype='M8[ns]')
34673477
result = s2.convert_objects(convert_dates='coerce',convert_numeric=False)
3468-
assert_series_equal(expected,result)
3478+
assert_series_equal(result, expected)
34693479
result = s2.convert_objects(convert_dates='coerce',convert_numeric=True)
3470-
assert_series_equal(expected,result)
3480+
assert_series_equal(result, expected)
34713481

34723482
# preserver all-nans (if convert_dates='coerce')
34733483
s = Series(['foo','bar',1,1.0],dtype='O')

0 commit comments

Comments
 (0)