pandas-dev · sinhrks · Mar 30, 2016 · sinhrks · Apr 29, 2016
diff --git a/asv_bench/benchmarks/miscellaneous.py b/asv_bench/benchmarks/miscellaneous.py
@@ -27,4 +27,26 @@ def prop(self):
         self.obj = Foo()
 
     def time_misc_cache_readonly(self):
-        self.obj.prop
+        self.obj.prop
+
+
+class to_numeric(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.n = 10000
+        self.float = Series(np.random.randn(self.n * 100))
+        self.numstr = self.float.astype('str')
+        self.str = Series(tm.makeStringIndex(self.n))
+
+    def time_from_float(self):
+        pd.to_numeric(self.float)
+
+    def time_from_numeric_str(self):
+        pd.to_numeric(self.numstr)
+
+    def time_from_str_ignore(self):
+        pd.to_numeric(self.str, errors='ignore')
+
+    def time_from_str_coerce(self):
+        pd.to_numeric(self.str, errors='coerce')
diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
@@ -409,6 +409,8 @@ Performance Improvements
 - Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
 - Improved performance of ``Period`` construction and time series plotting (:issue:`12903`, :issue:`11831`).
 - Improved performance of ``.str.encode()`` and ``.str.decode()`` methods (:issue:`13008`)
+- Improved performance of ``to_numeric`` if input is numeric dtype (:issue:`12777`)
+
 
 
 
@@ -516,3 +518,6 @@ Bug Fixes
 - Bug in ``.describe()`` resets categorical columns information (:issue:`11558`)
 - Bug where ``loffset`` argument was not applied when calling ``resample().count()`` on a timeseries (:issue:`12725`)
 - ``pd.read_excel()`` now accepts column names associated with keyword argument ``names`` (:issue:`12870`)
+- Bug in ``to_numeric`` with ``Index`` returns ``np.ndarray``, rather than ``Index`` (:issue:`12777`)
+- Bug in ``to_numeric`` with datetime-like may raise ``TypeError`` (:issue:`12777`)
+- Bug in ``to_numeric`` with scalar raises ``ValueError`` (:issue:`12777`)
diff --git a/pandas/tools/tests/test_util.py b/pandas/tools/tests/test_util.py
@@ -2,7 +2,6 @@
 import locale
 import codecs
 import nose
-from nose.tools import assert_raises
 
 import numpy as np
 from numpy.testing import assert_equal
@@ -102,9 +101,25 @@ def test_series(self):
         res = to_numeric(s)
         tm.assert_series_equal(res, expected)
 
+    def test_series_numeric(self):
+        s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX')
+        res = to_numeric(s)
+        tm.assert_series_equal(res, s)
+
+        s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX')
+        res = to_numeric(s)
+        tm.assert_series_equal(res, s)
+
+        # bool is regarded as numeric
+        s = pd.Series([True, False, True, True],
+                      index=list('ABCD'), name='XXX')
+        res = to_numeric(s)
+        tm.assert_series_equal(res, s)
+
     def test_error(self):
         s = pd.Series([1, -3.14, 'apple'])
-        assert_raises(ValueError, to_numeric, s, errors='raise')
+        with tm.assertRaises(ValueError):
+            to_numeric(s, errors='raise')
 
         res = to_numeric(s, errors='ignore')
         expected = pd.Series([1, -3.14, 'apple'])
@@ -114,12 +129,40 @@ def test_error(self):
         expected = pd.Series([1, -3.14, np.nan])
         tm.assert_series_equal(res, expected)
 
+    def test_error_seen_bool(self):
+        s = pd.Series([True, False, 'apple'])
+        with tm.assertRaises(ValueError):
+            to_numeric(s, errors='raise')
+
+        res = to_numeric(s, errors='ignore')
+        expected = pd.Series([True, False, 'apple'])
+        tm.assert_series_equal(res, expected)
+
+        # coerces to float
+        res = to_numeric(s, errors='coerce')
+        expected = pd.Series([1., 0., np.nan])
+        tm.assert_series_equal(res, expected)
+
     def test_list(self):
         s = ['1', '-3.14', '7']
         res = to_numeric(s)
         expected = np.array([1, -3.14, 7])
         tm.assert_numpy_array_equal(res, expected)
 
+    def test_list_numeric(self):
+        s = [1, 3, 4, 5]
+        res = to_numeric(s)
+        tm.assert_numpy_array_equal(res, np.array(s))
+
+        s = [1., 3., 4., 5.]
+        res = to_numeric(s)
+        tm.assert_numpy_array_equal(res, np.array(s))
+
+        # bool is regarded as numeric
+        s = [True, False, True, True]
+        res = to_numeric(s)
+        tm.assert_numpy_array_equal(res, np.array(s))
+
     def test_numeric(self):
         s = pd.Series([1, -3.14, 7], dtype='O')
         res = to_numeric(s)
@@ -145,6 +188,96 @@ def test_type_check(self):
             with tm.assertRaisesRegexp(TypeError, "1-d array"):
                 to_numeric(df, errors=errors)
 
+    def test_scalar(self):
+        self.assertEqual(pd.to_numeric(1), 1)
+        self.assertEqual(pd.to_numeric(1.1), 1.1)
+
+        self.assertEqual(pd.to_numeric('1'), 1)
+        self.assertEqual(pd.to_numeric('1.1'), 1.1)
+
+        with tm.assertRaises(ValueError):
+            to_numeric('XX', errors='raise')
+
+        self.assertEqual(to_numeric('XX', errors='ignore'), 'XX')
+        self.assertTrue(np.isnan(to_numeric('XX', errors='coerce')))
+
+    def test_numeric_dtypes(self):
+        idx = pd.Index([1, 2, 3], name='xxx')
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, idx)
+
+        res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))
+
+        res = pd.to_numeric(idx.values)
+        tm.assert_numpy_array_equal(res, idx.values)
+
+        idx = pd.Index([1., np.nan, 3., np.nan], name='xxx')
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, idx)
+
+        res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))
+
+        res = pd.to_numeric(idx.values)
+        tm.assert_numpy_array_equal(res, idx.values)
+
+    def test_str(self):
+        idx = pd.Index(['1', '2', '3'], name='xxx')
+        exp = np.array([1, 2, 3])
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))
+
+        res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))
+
+        res = pd.to_numeric(idx.values)
+        tm.assert_numpy_array_equal(res, exp)
+
+        idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx')
+        exp = np.array([1.5, 2.7, 3.4])
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))
+
+        res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))
+
+        res = pd.to_numeric(idx.values)
+        tm.assert_numpy_array_equal(res, exp)
+
+    def test_datetimelike(self):
+        for tz in [None, 'US/Eastern', 'Asia/Tokyo']:
+            idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx')
+            res = pd.to_numeric(idx)
+            tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
+
+            res = pd.to_numeric(pd.Series(idx, name='xxx'))
+            tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
+
+            res = pd.to_numeric(idx.values)
+            tm.assert_numpy_array_equal(res, idx.asi8)
+
+    def test_timedelta(self):
+        idx = pd.timedelta_range('1 days', periods=3, freq='D', name='xxx')
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
+
+        res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
+
+        res = pd.to_numeric(idx.values)
+        tm.assert_numpy_array_equal(res, idx.asi8)
+
+    def test_period(self):
+        idx = pd.period_range('2011-01', periods=3, freq='M', name='xxx')
+        res = pd.to_numeric(idx)
+        tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))
+
+        # ToDo: enable when we can support native PeriodDtype
+        # res = pd.to_numeric(pd.Series(idx, name='xxx'))
+        # tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))
+
+
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/tools/util.py b/pandas/tools/util.py
@@ -78,29 +78,52 @@ def to_numeric(arg, errors='raise'):
     >>> pd.to_numeric(s, errors='ignore')
     >>> pd.to_numeric(s, errors='coerce')
     """
+    is_series = False
+    is_index = False
+    is_scalar = False
 
-    index = name = None
     if isinstance(arg, pd.Series):
-        index, name = arg.index, arg.name
+        is_series = True
+        values = arg.values
+    elif isinstance(arg, pd.Index):
+        is_index = True
+        values = arg.asi8
+        if values is None:
+            values = arg.values
     elif isinstance(arg, (list, tuple)):
-        arg = np.array(arg, dtype='O')
+        values = np.array(arg, dtype='O')
+    elif np.isscalar(arg):
+        if com.is_number(arg):
+            return arg
+        is_scalar = True
+        values = np.array([arg], dtype='O')
     elif getattr(arg, 'ndim', 1) > 1:
         raise TypeError('arg must be a list, tuple, 1-d array, or Series')
+    else:
+        values = arg
 
-    conv = arg
-    arg = com._ensure_object(arg)
-
-    coerce_numeric = False if errors in ('ignore', 'raise') else True
-
-    try:
-        conv = lib.maybe_convert_numeric(arg,
-                                         set(),
-                                         coerce_numeric=coerce_numeric)
-    except:
-        if errors == 'raise':
-            raise
-
-    if index is not None:
-        return pd.Series(conv, index=index, name=name)
+    if com.is_numeric_dtype(values):
+        pass
+    elif com.is_datetime_or_timedelta_dtype(values):
+        values = values.astype(np.int64)
+    else:
+        values = com._ensure_object(values)
+        coerce_numeric = False if errors in ('ignore', 'raise') else True
+
+        try:
+            values = lib.maybe_convert_numeric(values, set(),
+                                               coerce_numeric=coerce_numeric)
+        except:
+            if errors == 'raise':
+                raise
+
+    if is_series:
+        return pd.Series(values, index=arg.index, name=arg.name)
+    elif is_index:
+        # because we want to coerce to numeric if possible,
+        # do not use _shallow_copy_with_infer
+        return Index(values, name=arg.name)
+    elif is_scalar:
+        return values[0]
     else:
-        return conv
+        return values