Skip to content

CLN: move/reorg pandas.tools -> pandas.core.reshape #16032

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1349,6 +1349,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
"pandas.parser", "pandas.io.libparsers", "X"
"pandas.formats", "pandas.io.formats", ""
"pandas.sparse", "pandas.core.sparse", ""
"pandas.tools", "pandas.core.reshape", ""
"pandas.types", "pandas.core.dtypes", ""
"pandas.io.sas.saslib", "pandas.io.sas.libsas", ""
"pandas._join", "pandas._libs.join", ""
Expand Down
9 changes: 1 addition & 8 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@
from pandas.stats.api import *
from pandas.tseries.api import *
from pandas.core.computation.api import *

from pandas.tools.concat import concat
from pandas.tools.merge import (merge, ordered_merge,
merge_ordered, merge_asof)
from pandas.tools.pivot import pivot_table, crosstab
from pandas.core.reshape.api import *

# deprecate tools.plotting, plot_params and scatter_matrix on the top namespace
import pandas.tools.plotting
Expand All @@ -58,9 +54,6 @@
'pandas.scatter_matrix', pandas.plotting.scatter_matrix,
'pandas.plotting.scatter_matrix')

from pandas.tools.tile import cut, qcut
from pandas.tools.util import to_numeric
from pandas.core.reshape import melt
from pandas.util.print_versions import show_versions
from pandas.io.api import *
from pandas.util._tester import test
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,

if bins is not None:
try:
from pandas.tools.tile import cut
from pandas.core.reshape.tile import cut
values = Series(values)
ii = cut(values, bins, include_lowest=True)
except TypeError:
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
from pandas.core.frame import DataFrame
from pandas.core.panel import Panel, WidePanel
from pandas.core.panel4d import Panel4D
from pandas.core.reshape import (pivot_simple as pivot, get_dummies,
lreshape, wide_to_long)
from pandas.core.reshape.reshape import (
pivot_simple as pivot, get_dummies,
lreshape, wide_to_long)

from pandas.core.indexing import IndexSlice
from pandas.core.dtypes.cast import to_numeric
from pandas.tseries.offsets import DateOffset
from pandas.tseries.tools import to_datetime
from pandas.tseries.index import (DatetimeIndex, Timestamp,
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ def nested_renaming_depr(level=4):
len(obj.columns.intersection(keys)) != len(keys)):
nested_renaming_depr()

from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat

def _agg_1dim(name, how, subset=None):
"""
Expand Down Expand Up @@ -671,7 +671,7 @@ def is_any_frame():
return result, True

def _aggregate_multiple_funcs(self, arg, _level, _axis):
from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat

if _axis != 0:
raise NotImplementedError("axis other than 0 is not supported")
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1995,7 +1995,7 @@ def describe(self):
counts = self.value_counts(dropna=False)
freqs = counts / float(counts.sum())

from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat
result = concat([counts, freqs], axis=1)
result.columns = ['counts', 'freqs']
result.index.name = 'categories'
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandas.core.base import StringMixin
from pandas.core import common as com
import pandas.io.formats.printing as printing
from pandas.tools.util import compose
from pandas.core.reshape.util import compose
from pandas.core.computation.ops import (
_cmp_ops_syms, _bool_ops_syms,
_arith_ops_syms, _unary_ops_syms, is_term)
Expand Down
164 changes: 163 additions & 1 deletion pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
import warnings

import pandas as pd
from pandas._libs import tslib, lib
from pandas._libs.tslib import iNaT
from pandas.compat import string_types, text_type, PY3
Expand All @@ -18,14 +19,17 @@
is_integer_dtype,
is_datetime_or_timedelta_dtype,
is_bool_dtype, is_scalar,
is_numeric_dtype, is_decimal,
is_number,
_string_dtypes,
_coerce_to_dtype,
_ensure_int8, _ensure_int16,
_ensure_int32, _ensure_int64,
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
_POSSIBLY_CAST_DTYPES)
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
ABCSeries, ABCIndexClass)
from .missing import isnull, notnull
from .inference import is_list_like

Expand Down Expand Up @@ -1025,3 +1029,161 @@ def find_common_type(types):
return np.object

return np.find_common_type(types, [])


def to_numeric(arg, errors='raise', downcast=None):
"""
Convert argument to a numeric type.

Parameters
----------
arg : list, tuple, 1-d array, or Series
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaN
- If 'ignore', then invalid parsing will return the input
downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
If not None, and if the data has been successfully cast to a
numerical dtype (or if the data was numeric to begin with),
downcast that resulting data to the smallest numerical dtype
possible according to the following rules:

- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
- 'float': smallest float dtype (min.: np.float32)

As this behaviour is separate from the core conversion to
numeric values, any errors raised during the downcasting
will be surfaced regardless of the value of the 'errors' input.

In addition, downcasting will only occur if the size
of the resulting data's dtype is strictly larger than
the dtype it is to be cast to, so if none of the dtypes
checked satisfy that specification, no downcasting will be
performed on the data.

.. versionadded:: 0.19.0

Returns
-------
ret : numeric if parsing succeeded.
Return type depends on input. Series if Series, otherwise ndarray

Examples
--------
Take separate series and convert to numeric, coercing when told to

>>> import pandas as pd
>>> s = pd.Series(['1.0', '2', -3])
>>> pd.to_numeric(s)
0 1.0
1 2.0
2 -3.0
dtype: float64
>>> pd.to_numeric(s, downcast='float')
0 1.0
1 2.0
2 -3.0
dtype: float32
>>> pd.to_numeric(s, downcast='signed')
0 1
1 2
2 -3
dtype: int8
>>> s = pd.Series(['apple', '1.0', '2', -3])
>>> pd.to_numeric(s, errors='ignore')
0 apple
1 1.0
2 2
3 -3
dtype: object
>>> pd.to_numeric(s, errors='coerce')
0 NaN
1 1.0
2 2.0
3 -3.0
dtype: float64
"""
if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
raise ValueError('invalid downcasting method provided')

is_series = False
is_index = False
is_scalars = False

if isinstance(arg, ABCSeries):
is_series = True
values = arg.values
elif isinstance(arg, ABCIndexClass):
is_index = True
values = arg.asi8
if values is None:
values = arg.values
elif isinstance(arg, (list, tuple)):
values = np.array(arg, dtype='O')
elif is_scalar(arg):
if is_decimal(arg):
return float(arg)
if is_number(arg):
return arg
is_scalars = True
values = np.array([arg], dtype='O')
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
else:
values = arg

try:
if is_numeric_dtype(values):
pass
elif is_datetime_or_timedelta_dtype(values):
values = values.astype(np.int64)
else:
values = _ensure_object(values)
coerce_numeric = False if errors in ('ignore', 'raise') else True
values = lib.maybe_convert_numeric(values, set(),
coerce_numeric=coerce_numeric)

except Exception:
if errors == 'raise':
raise

# attempt downcast only if the data has been successfully converted
# to a numerical dtype and if a downcast method has been specified
if downcast is not None and is_numeric_dtype(values):
typecodes = None

if downcast in ('integer', 'signed'):
typecodes = np.typecodes['Integer']
elif downcast == 'unsigned' and np.min(values) >= 0:
typecodes = np.typecodes['UnsignedInteger']
elif downcast == 'float':
typecodes = np.typecodes['Float']

# pandas support goes only to np.float32,
# as float dtypes smaller than that are
# extremely rare and not well supported
float_32_char = np.dtype(np.float32).char
float_32_ind = typecodes.index(float_32_char)
typecodes = typecodes[float_32_ind:]

if typecodes is not None:
# from smallest to largest
for dtype in typecodes:
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
values = maybe_downcast_to_dtype(values, dtype)

# successful conversion
if values.dtype == dtype:
break

if is_series:
return pd.Series(values, index=arg.index, name=arg.name)
elif is_index:
# because we want to coerce to numeric if possible,
# do not use _shallow_copy_with_infer
return pd.Index(values, name=arg.name)
elif is_scalars:
return values[0]
else:
return values
20 changes: 10 additions & 10 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3956,7 +3956,7 @@ def pivot(self, index=None, columns=None, values=None):


"""
from pandas.core.reshape import pivot
from pandas.core.reshape.reshape import pivot
return pivot(self, index=index, columns=columns, values=values)

def stack(self, level=-1, dropna=True):
Expand Down Expand Up @@ -3992,7 +3992,7 @@ def stack(self, level=-1, dropna=True):
-------
stacked : DataFrame or Series
"""
from pandas.core.reshape import stack, stack_multiple
from pandas.core.reshape.reshape import stack, stack_multiple

if isinstance(level, (tuple, list)):
return stack_multiple(self, level, dropna=dropna)
Expand Down Expand Up @@ -4057,7 +4057,7 @@ def unstack(self, level=-1, fill_value=None):
-------
unstacked : DataFrame or Series
"""
from pandas.core.reshape import unstack
from pandas.core.reshape.reshape import unstack
return unstack(self, level, fill_value)

_shared_docs['melt'] = ("""
Expand Down Expand Up @@ -4159,7 +4159,7 @@ def unstack(self, level=-1, fill_value=None):
other='melt'))
def melt(self, id_vars=None, value_vars=None, var_name=None,
value_name='value', col_level=None):
from pandas.core.reshape import melt
from pandas.core.reshape.reshape import melt
return melt(self, id_vars=id_vars, value_vars=value_vars,
var_name=var_name, value_name=value_name,
col_level=col_level)
Expand Down Expand Up @@ -4609,7 +4609,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
if (self.columns.get_indexer(other.columns) >= 0).all():
other = other.loc[:, self.columns]

from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat
if isinstance(other, (list, tuple)):
to_concat = [self] + other
else:
Expand Down Expand Up @@ -4741,8 +4741,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',

def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
sort=False):
from pandas.tools.merge import merge
from pandas.tools.concat import concat
from pandas.core.reshape.merge import merge
from pandas.core.reshape.concat import concat

if isinstance(other, Series):
if other.name is None:
Expand Down Expand Up @@ -4786,7 +4786,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
left_index=False, right_index=False, sort=False,
suffixes=('_x', '_y'), copy=True, indicator=False):
from pandas.tools.merge import merge
from pandas.core.reshape.merge import merge
return merge(self, right, how=how, on=on, left_on=left_on,
right_on=right_on, left_index=left_index,
right_index=right_index, sort=sort, suffixes=suffixes,
Expand Down Expand Up @@ -4846,7 +4846,7 @@ def round(self, decimals=0, *args, **kwargs):
Series.round

"""
from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat

def _dict_round(df, decimals):
for col, vals in df.iteritems():
Expand Down Expand Up @@ -5523,7 +5523,7 @@ def isin(self, values):
"""
if isinstance(values, dict):
from collections import defaultdict
from pandas.tools.concat import concat
from pandas.core.reshape.concat import concat
values = defaultdict(list, values)
return concat((self.iloc[:, [i]].isin(values[col])
for i, col in enumerate(self.columns)), axis=1)
Expand Down
Loading