pandas-dev · jreback · Apr 17, 2017 · Apr 17, 2017
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -1349,6 +1349,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
     "pandas.parser", "pandas.io.libparsers", "X"
     "pandas.formats", "pandas.io.formats", ""
     "pandas.sparse", "pandas.core.sparse", ""
+    "pandas.tools", "pandas.core.reshape", ""
     "pandas.types", "pandas.core.dtypes", ""
     "pandas.io.sas.saslib", "pandas.io.sas.libsas", ""
     "pandas._join", "pandas._libs.join", ""

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -44,11 +44,7 @@
 from pandas.stats.api import *
 from pandas.tseries.api import *
 from pandas.core.computation.api import *
-
-from pandas.tools.concat import concat
-from pandas.tools.merge import (merge, ordered_merge,
-                                merge_ordered, merge_asof)
-from pandas.tools.pivot import pivot_table, crosstab
+from pandas.core.reshape.api import *
 
 # deprecate tools.plotting, plot_params and scatter_matrix on the top namespace
 import pandas.tools.plotting
@@ -58,9 +54,6 @@
     'pandas.scatter_matrix', pandas.plotting.scatter_matrix,
     'pandas.plotting.scatter_matrix')
 
-from pandas.tools.tile import cut, qcut
-from pandas.tools.util import to_numeric
-from pandas.core.reshape import melt
 from pandas.util.print_versions import show_versions
 from pandas.io.api import *
 from pandas.util._tester import test

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -605,7 +605,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
     if bins is not None:
         try:
-            from pandas.tools.tile import cut
+            from pandas.core.reshape.tile import cut
             values = Series(values)
             ii = cut(values, bins, include_lowest=True)
         except TypeError:

diff --git a/pandas/core/api.py b/pandas/core/api.py
@@ -18,10 +18,12 @@
 from pandas.core.frame import DataFrame
 from pandas.core.panel import Panel, WidePanel
 from pandas.core.panel4d import Panel4D
-from pandas.core.reshape import (pivot_simple as pivot, get_dummies,
-                                 lreshape, wide_to_long)
+from pandas.core.reshape.reshape import (
+    pivot_simple as pivot, get_dummies,
+    lreshape, wide_to_long)
 
 from pandas.core.indexing import IndexSlice
+from pandas.core.dtypes.cast import to_numeric
 from pandas.tseries.offsets import DateOffset
 from pandas.tseries.tools import to_datetime
 from pandas.tseries.index import (DatetimeIndex, Timestamp,

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -522,7 +522,7 @@ def nested_renaming_depr(level=4):
                         len(obj.columns.intersection(keys)) != len(keys)):
                     nested_renaming_depr()
 
-            from pandas.tools.concat import concat
+            from pandas.core.reshape.concat import concat
 
             def _agg_1dim(name, how, subset=None):
                 """
@@ -671,7 +671,7 @@ def is_any_frame():
         return result, True
 
     def _aggregate_multiple_funcs(self, arg, _level, _axis):
-        from pandas.tools.concat import concat
+        from pandas.core.reshape.concat import concat
 
         if _axis != 0:
             raise NotImplementedError("axis other than 0 is not supported")

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -1995,7 +1995,7 @@ def describe(self):
         counts = self.value_counts(dropna=False)
         freqs = counts / float(counts.sum())
 
-        from pandas.tools.concat import concat
+        from pandas.core.reshape.concat import concat
         result = concat([counts, freqs], axis=1)
         result.columns = ['counts', 'freqs']
         result.index.name = 'categories'

diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
@@ -13,7 +13,7 @@
 from pandas.core.base import StringMixin
 from pandas.core import common as com
 import pandas.io.formats.printing as printing
-from pandas.tools.util import compose
+from pandas.core.reshape.util import compose
 from pandas.core.computation.ops import (
     _cmp_ops_syms, _bool_ops_syms,
     _arith_ops_syms, _unary_ops_syms, is_term)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -5,6 +5,7 @@
 import numpy as np
 import warnings
 
+import pandas as pd
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import iNaT
 from pandas.compat import string_types, text_type, PY3
@@ -18,14 +19,17 @@
                      is_integer_dtype,
                      is_datetime_or_timedelta_dtype,
                      is_bool_dtype, is_scalar,
+                     is_numeric_dtype, is_decimal,
+                     is_number,
                      _string_dtypes,
                      _coerce_to_dtype,
                      _ensure_int8, _ensure_int16,
                      _ensure_int32, _ensure_int64,
                      _NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
                      _POSSIBLY_CAST_DTYPES)
 from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
-from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries
+from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
+                      ABCSeries, ABCIndexClass)
 from .missing import isnull, notnull
 from .inference import is_list_like
 
@@ -1025,3 +1029,161 @@ def find_common_type(types):
             return np.object
 
     return np.find_common_type(types, [])
+
+
+def to_numeric(arg, errors='raise', downcast=None):
+    """
+    Convert argument to a numeric type.
+
+    Parameters
+    ----------
+    arg : list, tuple, 1-d array, or Series
+    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
+        - If 'raise', then invalid parsing will raise an exception
+        - If 'coerce', then invalid parsing will be set as NaN
+        - If 'ignore', then invalid parsing will return the input
+    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
+        If not None, and if the data has been successfully cast to a
+        numerical dtype (or if the data was numeric to begin with),
+        downcast that resulting data to the smallest numerical dtype
+        possible according to the following rules:
+
+        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
+        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
+        - 'float': smallest float dtype (min.: np.float32)
+
+        As this behaviour is separate from the core conversion to
+        numeric values, any errors raised during the downcasting
+        will be surfaced regardless of the value of the 'errors' input.
+
+        In addition, downcasting will only occur if the size
+        of the resulting data's dtype is strictly larger than
+        the dtype it is to be cast to, so if none of the dtypes
+        checked satisfy that specification, no downcasting will be
+        performed on the data.
+
+        .. versionadded:: 0.19.0
+
+    Returns
+    -------
+    ret : numeric if parsing succeeded.
+        Return type depends on input.  Series if Series, otherwise ndarray
+
+    Examples
+    --------
+    Take separate series and convert to numeric, coercing when told to
+
+    >>> import pandas as pd
+    >>> s = pd.Series(['1.0', '2', -3])
+    >>> pd.to_numeric(s)
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float64
+    >>> pd.to_numeric(s, downcast='float')
+    0    1.0
+    1    2.0
+    2   -3.0
+    dtype: float32
+    >>> pd.to_numeric(s, downcast='signed')
+    0    1
+    1    2
+    2   -3
+    dtype: int8
+    >>> s = pd.Series(['apple', '1.0', '2', -3])
+    >>> pd.to_numeric(s, errors='ignore')
+    0    apple
+    1      1.0
+    2        2
+    3       -3
+    dtype: object
+    >>> pd.to_numeric(s, errors='coerce')
+    0    NaN
+    1    1.0
+    2    2.0
+    3   -3.0
+    dtype: float64
+    """
+    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
+        raise ValueError('invalid downcasting method provided')
+
+    is_series = False
+    is_index = False
+    is_scalars = False
+
+    if isinstance(arg, ABCSeries):
+        is_series = True
+        values = arg.values
+    elif isinstance(arg, ABCIndexClass):
+        is_index = True
+        values = arg.asi8
+        if values is None:
+            values = arg.values
+    elif isinstance(arg, (list, tuple)):
+        values = np.array(arg, dtype='O')
+    elif is_scalar(arg):
+        if is_decimal(arg):
+            return float(arg)
+        if is_number(arg):
+            return arg
+        is_scalars = True
+        values = np.array([arg], dtype='O')
+    elif getattr(arg, 'ndim', 1) > 1:
+        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
+    else:
+        values = arg
+
+    try:
+        if is_numeric_dtype(values):
+            pass
+        elif is_datetime_or_timedelta_dtype(values):
+            values = values.astype(np.int64)
+        else:
+            values = _ensure_object(values)
+            coerce_numeric = False if errors in ('ignore', 'raise') else True
+            values = lib.maybe_convert_numeric(values, set(),
+                                               coerce_numeric=coerce_numeric)
+
+    except Exception:
+        if errors == 'raise':
+            raise
+
+    # attempt downcast only if the data has been successfully converted
+    # to a numerical dtype and if a downcast method has been specified
+    if downcast is not None and is_numeric_dtype(values):
+        typecodes = None
+
+        if downcast in ('integer', 'signed'):
+            typecodes = np.typecodes['Integer']
+        elif downcast == 'unsigned' and np.min(values) >= 0:
+            typecodes = np.typecodes['UnsignedInteger']
+        elif downcast == 'float':
+            typecodes = np.typecodes['Float']
+
+            # pandas support goes only to np.float32,
+            # as float dtypes smaller than that are
+            # extremely rare and not well supported
+            float_32_char = np.dtype(np.float32).char
+            float_32_ind = typecodes.index(float_32_char)
+            typecodes = typecodes[float_32_ind:]
+
+        if typecodes is not None:
+            # from smallest to largest
+            for dtype in typecodes:
+                if np.dtype(dtype).itemsize <= values.dtype.itemsize:
+                    values = maybe_downcast_to_dtype(values, dtype)
+
+                    # successful conversion
+                    if values.dtype == dtype:
+                        break
+
+    if is_series:
+        return pd.Series(values, index=arg.index, name=arg.name)
+    elif is_index:
+        # because we want to coerce to numeric if possible,
+        # do not use _shallow_copy_with_infer
+        return pd.Index(values, name=arg.name)
+    elif is_scalars:
+        return values[0]
+    else:
+        return values
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3956,7 +3956,7 @@ def pivot(self, index=None, columns=None, values=None):
 
 
         """
-        from pandas.core.reshape import pivot
+        from pandas.core.reshape.reshape import pivot
         return pivot(self, index=index, columns=columns, values=values)
 
     def stack(self, level=-1, dropna=True):
@@ -3992,7 +3992,7 @@ def stack(self, level=-1, dropna=True):
         -------
         stacked : DataFrame or Series
         """
-        from pandas.core.reshape import stack, stack_multiple
+        from pandas.core.reshape.reshape import stack, stack_multiple
 
         if isinstance(level, (tuple, list)):
             return stack_multiple(self, level, dropna=dropna)
@@ -4057,7 +4057,7 @@ def unstack(self, level=-1, fill_value=None):
         -------
         unstacked : DataFrame or Series
         """
-        from pandas.core.reshape import unstack
+        from pandas.core.reshape.reshape import unstack
         return unstack(self, level, fill_value)
 
     _shared_docs['melt'] = ("""
@@ -4159,7 +4159,7 @@ def unstack(self, level=-1, fill_value=None):
                    other='melt'))
     def melt(self, id_vars=None, value_vars=None, var_name=None,
              value_name='value', col_level=None):
-        from pandas.core.reshape import melt
+        from pandas.core.reshape.reshape import melt
         return melt(self, id_vars=id_vars, value_vars=value_vars,
                     var_name=var_name, value_name=value_name,
                     col_level=col_level)
@@ -4609,7 +4609,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
             if (self.columns.get_indexer(other.columns) >= 0).all():
                 other = other.loc[:, self.columns]
 
-        from pandas.tools.concat import concat
+        from pandas.core.reshape.concat import concat
         if isinstance(other, (list, tuple)):
             to_concat = [self] + other
         else:
@@ -4741,8 +4741,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
 
     def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
                      sort=False):
-        from pandas.tools.merge import merge
-        from pandas.tools.concat import concat
+        from pandas.core.reshape.merge import merge
+        from pandas.core.reshape.concat import concat
 
         if isinstance(other, Series):
             if other.name is None:
@@ -4786,7 +4786,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
     def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
               left_index=False, right_index=False, sort=False,
               suffixes=('_x', '_y'), copy=True, indicator=False):
-        from pandas.tools.merge import merge
+        from pandas.core.reshape.merge import merge
         return merge(self, right, how=how, on=on, left_on=left_on,
                      right_on=right_on, left_index=left_index,
                      right_index=right_index, sort=sort, suffixes=suffixes,
@@ -4846,7 +4846,7 @@ def round(self, decimals=0, *args, **kwargs):
         Series.round
 
         """
-        from pandas.tools.concat import concat
+        from pandas.core.reshape.concat import concat
 
         def _dict_round(df, decimals):
             for col, vals in df.iteritems():
@@ -5523,7 +5523,7 @@ def isin(self, values):
         """
         if isinstance(values, dict):
             from collections import defaultdict
-            from pandas.tools.concat import concat
+            from pandas.core.reshape.concat import concat
             values = defaultdict(list, values)
             return concat((self.iloc[:, [i]].isin(values[col])
                            for i, col in enumerate(self.columns)), axis=1)