Skip to content

Commit d234d84

Browse files
Sheppard, KevinSheppard, Kevin
Sheppard, Kevin
authored and
Sheppard, Kevin
committed
FIX: Add boxing for to_numeric and add tests
Added support for unboxing/boxing to to_numeric Added tests for to_numeric
1 parent 7e18d1a commit d234d84

File tree

6 files changed

+131
-61
lines changed

6 files changed

+131
-61
lines changed

doc/source/whatsnew/v0.17.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -927,7 +927,7 @@ Deprecations
927927
- ``Series.is_time_series`` deprecated in favor of ``Series.index.is_all_dates`` (:issue:`11135`)
928928
- Legacy offsets (like ``'A@JAN'``) listed in :ref:`here <timeseries.legacyaliases>` are deprecated (note that this has been alias since 0.8.0), (:issue:`10878`)
929929
- ``WidePanel`` deprecated in favor of ``Panel``, ``LongPanel`` in favor of ``DataFrame`` (note these have been aliases since < 0.11.0), (:issue:`10892`)
930-
- ``DataFrame.convert_objects`` has been deprecated in favor of type-specific function ``pd.to_datetime``, ``pd.to_timestamp`` and ``pd.to_numeric``.
930+
- ``DataFrame.convert_objects`` has been deprecated in favor of type-specific function ``pd.to_datetime``, ``pd.to_timestamp`` and ``pd.to_numeric`` (:issue:`11133`).
931931

932932
.. _whatsnew_0170.prior_deprecations:
933933

pandas/core/convert.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
106106
elif timedelta:
107107
return pd.to_timedelta(values, errors='coerce', box=False)
108108
elif numeric:
109-
return lib.maybe_convert_numeric(values, set(), coerce_numeric=True)
109+
return pd.to_numeric(values, errors='coerce')
110110

111111
# Soft conversions
112112
if datetime:

pandas/tests/test_series.py

+40-24
Original file line numberDiff line numberDiff line change
@@ -6452,35 +6452,41 @@ def test_apply_dont_convert_dtype(self):
64526452
def test_convert_objects(self):
64536453

64546454
s = Series([1., 2, 3], index=['a', 'b', 'c'])
6455-
result = s.convert_objects(convert_dates=False, convert_numeric=True)
6455+
with tm.assert_produces_warning(FutureWarning):
6456+
result = s.convert_objects(convert_dates=False, convert_numeric=True)
64566457
assert_series_equal(result, s)
64576458

64586459
# force numeric conversion
64596460
r = s.copy().astype('O')
64606461
r['a'] = '1'
6461-
result = r.convert_objects(convert_dates=False, convert_numeric=True)
6462+
with tm.assert_produces_warning(FutureWarning):
6463+
result = r.convert_objects(convert_dates=False, convert_numeric=True)
64626464
assert_series_equal(result, s)
64636465

64646466
r = s.copy().astype('O')
64656467
r['a'] = '1.'
6466-
result = r.convert_objects(convert_dates=False, convert_numeric=True)
6468+
with tm.assert_produces_warning(FutureWarning):
6469+
result = r.convert_objects(convert_dates=False, convert_numeric=True)
64676470
assert_series_equal(result, s)
64686471

64696472
r = s.copy().astype('O')
64706473
r['a'] = 'garbled'
64716474
expected = s.copy()
64726475
expected['a'] = np.nan
6473-
result = r.convert_objects(convert_dates=False, convert_numeric=True)
6476+
with tm.assert_produces_warning(FutureWarning):
6477+
result = r.convert_objects(convert_dates=False, convert_numeric=True)
64746478
assert_series_equal(result, expected)
64756479

64766480
# GH 4119, not converting a mixed type (e.g.floats and object)
64776481
s = Series([1, 'na', 3, 4])
6478-
result = s.convert_objects(convert_numeric=True)
6482+
with tm.assert_produces_warning(FutureWarning):
6483+
result = s.convert_objects(convert_numeric=True)
64796484
expected = Series([1, np.nan, 3, 4])
64806485
assert_series_equal(result, expected)
64816486

64826487
s = Series([1, '', 3, 4])
6483-
result = s.convert_objects(convert_numeric=True)
6488+
with tm.assert_produces_warning(FutureWarning):
6489+
result = s.convert_objects(convert_numeric=True)
64846490
expected = Series([1, np.nan, 3, 4])
64856491
assert_series_equal(result, expected)
64866492

@@ -6489,39 +6495,45 @@ def test_convert_objects(self):
64896495
[datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0)])
64906496
s2 = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(
64916497
2001, 1, 3, 0, 0), 'foo', 1.0, 1, Timestamp('20010104'), '20010105'], dtype='O')
6492-
6493-
result = s.convert_objects(convert_dates=True, convert_numeric=False)
6498+
with tm.assert_produces_warning(FutureWarning):
6499+
result = s.convert_objects(convert_dates=True, convert_numeric=False)
64946500
expected = Series(
64956501
[Timestamp('20010101'), Timestamp('20010102'), Timestamp('20010103')], dtype='M8[ns]')
64966502
assert_series_equal(result, expected)
64976503

6498-
result = s.convert_objects(
6499-
convert_dates='coerce', convert_numeric=False)
6500-
result = s.convert_objects(
6501-
convert_dates='coerce', convert_numeric=True)
6504+
with tm.assert_produces_warning(FutureWarning):
6505+
result = s.convert_objects(convert_dates='coerce',
6506+
convert_numeric=False)
6507+
with tm.assert_produces_warning(FutureWarning):
6508+
result = s.convert_objects(convert_dates='coerce',
6509+
convert_numeric=True)
65026510
assert_series_equal(result, expected)
65036511

65046512
expected = Series(
65056513
[Timestamp(
65066514
'20010101'), Timestamp('20010102'), Timestamp('20010103'),
65076515
lib.NaT, lib.NaT, lib.NaT, Timestamp('20010104'), Timestamp('20010105')], dtype='M8[ns]')
6508-
result = s2.convert_objects(
6509-
convert_dates='coerce', convert_numeric=False)
6516+
with tm.assert_produces_warning(FutureWarning):
6517+
result = s2.convert_objects(convert_dates='coerce',
6518+
convert_numeric=False)
65106519
assert_series_equal(result, expected)
6511-
result = s2.convert_objects(
6512-
convert_dates='coerce', convert_numeric=True)
6520+
with tm.assert_produces_warning(FutureWarning):
6521+
result = s2.convert_objects(convert_dates='coerce',
6522+
convert_numeric=True)
65136523
assert_series_equal(result, expected)
65146524

65156525
# preserver all-nans (if convert_dates='coerce')
65166526
s = Series(['foo', 'bar', 1, 1.0], dtype='O')
6517-
result = s.convert_objects(
6518-
convert_dates='coerce', convert_numeric=False)
6527+
with tm.assert_produces_warning(FutureWarning):
6528+
result = s.convert_objects(convert_dates='coerce',
6529+
convert_numeric=False)
65196530
assert_series_equal(result, s)
65206531

65216532
# preserver if non-object
65226533
s = Series([1], dtype='float32')
6523-
result = s.convert_objects(
6524-
convert_dates='coerce', convert_numeric=False)
6534+
with tm.assert_produces_warning(FutureWarning):
6535+
result = s.convert_objects(convert_dates='coerce',
6536+
convert_numeric=False)
65256537
assert_series_equal(result, s)
65266538

65276539
#r = s.copy()
@@ -6532,21 +6544,25 @@ def test_convert_objects(self):
65326544
# dateutil parses some single letters into today's value as a date
65336545
for x in 'abcdefghijklmnopqrstuvwxyz':
65346546
s = Series([x])
6535-
result = s.convert_objects(convert_dates='coerce')
6547+
with tm.assert_produces_warning(FutureWarning):
6548+
result = s.convert_objects(convert_dates='coerce')
65366549
assert_series_equal(result, s)
65376550
s = Series([x.upper()])
6538-
result = s.convert_objects(convert_dates='coerce')
6551+
with tm.assert_produces_warning(FutureWarning):
6552+
result = s.convert_objects(convert_dates='coerce')
65396553
assert_series_equal(result, s)
65406554

65416555
def test_convert_objects_preserve_bool(self):
65426556
s = Series([1, True, 3, 5], dtype=object)
6543-
r = s.convert_objects(convert_numeric=True)
6557+
with tm.assert_produces_warning(FutureWarning):
6558+
r = s.convert_objects(convert_numeric=True)
65446559
e = Series([1, 1, 3, 5], dtype='i8')
65456560
tm.assert_series_equal(r, e)
65466561

65476562
def test_convert_objects_preserve_all_bool(self):
65486563
s = Series([False, True, False, False], dtype=object)
6549-
r = s.convert_objects(convert_numeric=True)
6564+
with tm.assert_produces_warning(FutureWarning):
6565+
r = s.convert_objects(convert_numeric=True)
65506566
e = Series([False, True, False, False], dtype=bool)
65516567
tm.assert_series_equal(r, e)
65526568

pandas/tests/test_util.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
# -*- coding: utf-8 -*-
2-
import warnings
3-
42
import nose
53

6-
import sys
7-
import pandas.util
84
from pandas.util.decorators import deprecate_kwarg
95
import pandas.util.testing as tm
106

7+
8+
119
class TestDecorators(tm.TestCase):
1210
def setUp(self):
1311
@deprecate_kwarg('old', 'new')
@@ -75,7 +73,6 @@ def test_rands_array():
7573
assert(arr.shape == (10, 10))
7674
assert(len(arr[1, 1]) == 7)
7775

78-
7976
if __name__ == '__main__':
8077
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
8178
exit=False)

pandas/tools/tests/test_util.py

+51-2
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
import locale
33
import codecs
44
import nose
5+
from nose.tools import assert_raises, assert_true
56

67
import numpy as np
78
from numpy.testing import assert_equal
89

10+
import pandas as pd
911
from pandas import date_range, Index
1012
import pandas.util.testing as tm
11-
from pandas.tools.util import cartesian_product
12-
13+
from pandas.tools.util import cartesian_product, to_numeric
1314

1415
CURRENT_LOCALE = locale.getlocale()
1516
LOCALE_OVERRIDE = os.environ.get('LOCALE_OVERRIDE', None)
@@ -89,6 +90,54 @@ def test_set_locale(self):
8990
self.assertEqual(current_locale, CURRENT_LOCALE)
9091

9192

93+
class TestToNumeric(tm.TestCase):
94+
def test_series(self):
95+
s = pd.Series(['1', '-3.14', '7'])
96+
res = to_numeric(s)
97+
expected = pd.Series([1, -3.14, 7])
98+
tm.assert_series_equal(res, expected)
99+
100+
s = pd.Series(['1', '-3.14', 7])
101+
res = to_numeric(s)
102+
tm.assert_series_equal(res, expected)
103+
104+
def test_error(self):
105+
s = pd.Series([1, -3.14, 'apple'])
106+
assert_raises(ValueError, to_numeric, s, errors='raise')
107+
108+
res = to_numeric(s, errors='ignore')
109+
expected = pd.Series([1, -3.14, 'apple'])
110+
tm.assert_series_equal(res, expected)
111+
112+
res = to_numeric(s, errors='coerce')
113+
expected = pd.Series([1, -3.14, np.nan])
114+
tm.assert_series_equal(res, expected)
115+
116+
117+
def test_list(self):
118+
s = ['1', '-3.14', '7']
119+
res = to_numeric(s)
120+
expected = np.array([1, -3.14, 7])
121+
tm.assert_numpy_array_equal(res, expected)
122+
123+
def test_numeric(self):
124+
s = pd.Series([1, -3.14, 7], dtype='O')
125+
res = to_numeric(s)
126+
expected = pd.Series([1, -3.14, 7])
127+
tm.assert_series_equal(res, expected)
128+
129+
s = pd.Series([1, -3.14, 7])
130+
res = to_numeric(s)
131+
tm.assert_series_equal(res, expected)
132+
133+
def test_all_nan(self):
134+
s = pd.Series(['a','b','c'])
135+
res = to_numeric(s, errors='coerce')
136+
expected = pd.Series([np.nan, np.nan, np.nan])
137+
tm.assert_series_equal(res, expected)
138+
139+
92140
if __name__ == '__main__':
93141
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
94142
exit=False)
143+

pandas/tools/util.py

+36-28
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import operator
2-
import warnings
1+
import numpy as np
2+
import pandas.lib as lib
3+
4+
import pandas as pd
35
from pandas.compat import reduce
46
from pandas.core.index import Index
5-
import numpy as np
6-
from pandas import algos
77
from pandas.core import common as com
88

99

@@ -50,47 +50,55 @@ def compose(*funcs):
5050
return reduce(_compose2, funcs)
5151

5252

53-
def to_numeric(arg, errors='raise', box=True, coerce=None):
53+
def to_numeric(arg, errors='raise'):
5454
"""
5555
Convert argument to a numeric type.
5656
5757
Parameters
5858
----------
59-
arg : string, datetime, array of strings (with possible NAs)
59+
arg : list, tuple or array of objects, or Series
6060
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
6161
- If 'raise', then invalid parsing will raise an exception
62-
- If 'coerce', then invalid parsing will be set as NaT
62+
- If 'coerce', then invalid parsing will be set as NaN
6363
- If 'ignore', then invalid parsing will return the input
64-
box : boolean, default True
65-
- If True returns a Series
66-
- If False returns ndarray of values.
6764
6865
Returns
6966
-------
7067
ret : numeric if parsing succeeded.
71-
Return type depends on box
72-
68+
Return type depends on input. Series if Series, otherwise ndarray
7369
7470
Examples
7571
--------
76-
Take separate series and convert to datetime
72+
Take separate series and convert to numeric, coercing when told to
7773
7874
>>> import pandas as pd
79-
>>> df = pd.DataFrame(['1.0', '2', -3])
80-
>>> pd.to_numeric(df)
81-
>>> df = pd.DataFrame(['apple', '1.0', '2', -3])
82-
>>> pd.to_numeric(df, errors='ignore')
83-
>>> pd.to_numeric(df, errors='coerce')
75+
>>> s = pd.Series(['1.0', '2', -3])
76+
>>> pd.to_numeric(s)
77+
>>> s = pd.Series(['apple', '1.0', '2', -3])
78+
>>> pd.to_numeric(s, errors='ignore')
79+
>>> pd.to_numeric(s, errors='coerce')
8480
"""
85-
#TODO: Fix examples
81+
82+
index = name = None
83+
if isinstance(arg, pd.Series):
84+
index, name = arg.index, arg.name
85+
elif isinstance(arg, (list, tuple)):
86+
arg = np.array(arg, dtype='O')
87+
88+
conv = arg
89+
arg = com._ensure_object(arg)
8690

8791
coerce_numeric = False if errors in ('ignore', 'raise') else True
88-
if errors == 'ignore':
89-
try:
90-
values = lib.maybe_convert_numeric(arg,
91-
set(),
92-
coerce_numeric=coerce_numeric)
93-
return values
94-
except:
95-
return arg
96-
return lib.maybe_convert_numeric(arg, set(), coerce_numeric=coerce_numeric)
92+
93+
try:
94+
conv = lib.maybe_convert_numeric(arg,
95+
set(),
96+
coerce_numeric=coerce_numeric)
97+
except:
98+
if errors == 'raise':
99+
raise
100+
101+
if index is not None:
102+
return pd.Series(conv, index=index, name=name)
103+
else:
104+
return conv

0 commit comments

Comments
 (0)