Skip to content

Commit d16cce8

Browse files
committed
CLN: move/reorg pandas.tools -> pandas.core.reshape
xref pandas-dev#13634 Author: Jeff Reback <[email protected]> Closes pandas-dev#16032 from jreback/move_tools and squashes the following commits: 376cef5 [Jeff Reback] move to_numeric cc6e059 [Jeff Reback] CLN: move/reorg pandas.tools -> pandas.core.reshape
1 parent 0ba305b commit d16cce8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+2281
-4355
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1353,6 +1353,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
13531353
"pandas.parser", "pandas.io.libparsers", "X"
13541354
"pandas.formats", "pandas.io.formats", ""
13551355
"pandas.sparse", "pandas.core.sparse", ""
1356+
"pandas.tools", "pandas.core.reshape", ""
13561357
"pandas.types", "pandas.core.dtypes", ""
13571358
"pandas.io.sas.saslib", "pandas.io.sas.libsas", ""
13581359
"pandas._join", "pandas._libs.join", ""

pandas/__init__.py

+1-8
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,7 @@
4444
from pandas.stats.api import *
4545
from pandas.tseries.api import *
4646
from pandas.core.computation.api import *
47-
48-
from pandas.tools.concat import concat
49-
from pandas.tools.merge import (merge, ordered_merge,
50-
merge_ordered, merge_asof)
51-
from pandas.tools.pivot import pivot_table, crosstab
47+
from pandas.core.reshape.api import *
5248

5349
# deprecate tools.plotting, plot_params and scatter_matrix on the top namespace
5450
import pandas.tools.plotting
@@ -58,9 +54,6 @@
5854
'pandas.scatter_matrix', pandas.plotting.scatter_matrix,
5955
'pandas.plotting.scatter_matrix')
6056

61-
from pandas.tools.tile import cut, qcut
62-
from pandas.tools.util import to_numeric
63-
from pandas.core.reshape import melt
6457
from pandas.util.print_versions import show_versions
6558
from pandas.io.api import *
6659
from pandas.util._tester import test

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -605,7 +605,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
605605

606606
if bins is not None:
607607
try:
608-
from pandas.tools.tile import cut
608+
from pandas.core.reshape.tile import cut
609609
values = Series(values)
610610
ii = cut(values, bins, include_lowest=True)
611611
except TypeError:

pandas/core/api.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
from pandas.core.frame import DataFrame
1919
from pandas.core.panel import Panel, WidePanel
2020
from pandas.core.panel4d import Panel4D
21-
from pandas.core.reshape import (pivot_simple as pivot, get_dummies,
22-
lreshape, wide_to_long)
21+
from pandas.core.reshape.reshape import (
22+
pivot_simple as pivot, get_dummies,
23+
lreshape, wide_to_long)
2324

2425
from pandas.core.indexing import IndexSlice
26+
from pandas.core.dtypes.cast import to_numeric
2527
from pandas.tseries.offsets import DateOffset
2628
from pandas.tseries.tools import to_datetime
2729
from pandas.tseries.index import (DatetimeIndex, Timestamp,

pandas/core/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,7 @@ def nested_renaming_depr(level=4):
522522
len(obj.columns.intersection(keys)) != len(keys)):
523523
nested_renaming_depr()
524524

525-
from pandas.tools.concat import concat
525+
from pandas.core.reshape.concat import concat
526526

527527
def _agg_1dim(name, how, subset=None):
528528
"""
@@ -671,7 +671,7 @@ def is_any_frame():
671671
return result, True
672672

673673
def _aggregate_multiple_funcs(self, arg, _level, _axis):
674-
from pandas.tools.concat import concat
674+
from pandas.core.reshape.concat import concat
675675

676676
if _axis != 0:
677677
raise NotImplementedError("axis other than 0 is not supported")

pandas/core/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,7 @@ def describe(self):
19951995
counts = self.value_counts(dropna=False)
19961996
freqs = counts / float(counts.sum())
19971997

1998-
from pandas.tools.concat import concat
1998+
from pandas.core.reshape.concat import concat
19991999
result = concat([counts, freqs], axis=1)
20002000
result.columns = ['counts', 'freqs']
20012001
result.index.name = 'categories'

pandas/core/computation/expr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pandas.core.base import StringMixin
1414
from pandas.core import common as com
1515
import pandas.io.formats.printing as printing
16-
from pandas.tools.util import compose
16+
from pandas.core.reshape.util import compose
1717
from pandas.core.computation.ops import (
1818
_cmp_ops_syms, _bool_ops_syms,
1919
_arith_ops_syms, _unary_ops_syms, is_term)

pandas/core/dtypes/cast.py

+163-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import warnings
77

8+
import pandas as pd
89
from pandas._libs import tslib, lib
910
from pandas._libs.tslib import iNaT
1011
from pandas.compat import string_types, text_type, PY3
@@ -18,14 +19,17 @@
1819
is_integer_dtype,
1920
is_datetime_or_timedelta_dtype,
2021
is_bool_dtype, is_scalar,
22+
is_numeric_dtype, is_decimal,
23+
is_number,
2124
_string_dtypes,
2225
_coerce_to_dtype,
2326
_ensure_int8, _ensure_int16,
2427
_ensure_int32, _ensure_int64,
2528
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
2629
_POSSIBLY_CAST_DTYPES)
2730
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
28-
from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries
31+
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
32+
ABCSeries, ABCIndexClass)
2933
from .missing import isnull, notnull
3034
from .inference import is_list_like
3135

@@ -1025,3 +1029,161 @@ def find_common_type(types):
10251029
return np.object
10261030

10271031
return np.find_common_type(types, [])
1032+
1033+
1034+
def to_numeric(arg, errors='raise', downcast=None):
1035+
"""
1036+
Convert argument to a numeric type.
1037+
1038+
Parameters
1039+
----------
1040+
arg : list, tuple, 1-d array, or Series
1041+
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
1042+
- If 'raise', then invalid parsing will raise an exception
1043+
- If 'coerce', then invalid parsing will be set as NaN
1044+
- If 'ignore', then invalid parsing will return the input
1045+
downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
1046+
If not None, and if the data has been successfully cast to a
1047+
numerical dtype (or if the data was numeric to begin with),
1048+
downcast that resulting data to the smallest numerical dtype
1049+
possible according to the following rules:
1050+
1051+
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
1052+
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
1053+
- 'float': smallest float dtype (min.: np.float32)
1054+
1055+
As this behaviour is separate from the core conversion to
1056+
numeric values, any errors raised during the downcasting
1057+
will be surfaced regardless of the value of the 'errors' input.
1058+
1059+
In addition, downcasting will only occur if the size
1060+
of the resulting data's dtype is strictly larger than
1061+
the dtype it is to be cast to, so if none of the dtypes
1062+
checked satisfy that specification, no downcasting will be
1063+
performed on the data.
1064+
1065+
.. versionadded:: 0.19.0
1066+
1067+
Returns
1068+
-------
1069+
ret : numeric if parsing succeeded.
1070+
Return type depends on input. Series if Series, otherwise ndarray
1071+
1072+
Examples
1073+
--------
1074+
Take separate series and convert to numeric, coercing when told to
1075+
1076+
>>> import pandas as pd
1077+
>>> s = pd.Series(['1.0', '2', -3])
1078+
>>> pd.to_numeric(s)
1079+
0 1.0
1080+
1 2.0
1081+
2 -3.0
1082+
dtype: float64
1083+
>>> pd.to_numeric(s, downcast='float')
1084+
0 1.0
1085+
1 2.0
1086+
2 -3.0
1087+
dtype: float32
1088+
>>> pd.to_numeric(s, downcast='signed')
1089+
0 1
1090+
1 2
1091+
2 -3
1092+
dtype: int8
1093+
>>> s = pd.Series(['apple', '1.0', '2', -3])
1094+
>>> pd.to_numeric(s, errors='ignore')
1095+
0 apple
1096+
1 1.0
1097+
2 2
1098+
3 -3
1099+
dtype: object
1100+
>>> pd.to_numeric(s, errors='coerce')
1101+
0 NaN
1102+
1 1.0
1103+
2 2.0
1104+
3 -3.0
1105+
dtype: float64
1106+
"""
1107+
if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
1108+
raise ValueError('invalid downcasting method provided')
1109+
1110+
is_series = False
1111+
is_index = False
1112+
is_scalars = False
1113+
1114+
if isinstance(arg, ABCSeries):
1115+
is_series = True
1116+
values = arg.values
1117+
elif isinstance(arg, ABCIndexClass):
1118+
is_index = True
1119+
values = arg.asi8
1120+
if values is None:
1121+
values = arg.values
1122+
elif isinstance(arg, (list, tuple)):
1123+
values = np.array(arg, dtype='O')
1124+
elif is_scalar(arg):
1125+
if is_decimal(arg):
1126+
return float(arg)
1127+
if is_number(arg):
1128+
return arg
1129+
is_scalars = True
1130+
values = np.array([arg], dtype='O')
1131+
elif getattr(arg, 'ndim', 1) > 1:
1132+
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
1133+
else:
1134+
values = arg
1135+
1136+
try:
1137+
if is_numeric_dtype(values):
1138+
pass
1139+
elif is_datetime_or_timedelta_dtype(values):
1140+
values = values.astype(np.int64)
1141+
else:
1142+
values = _ensure_object(values)
1143+
coerce_numeric = False if errors in ('ignore', 'raise') else True
1144+
values = lib.maybe_convert_numeric(values, set(),
1145+
coerce_numeric=coerce_numeric)
1146+
1147+
except Exception:
1148+
if errors == 'raise':
1149+
raise
1150+
1151+
# attempt downcast only if the data has been successfully converted
1152+
# to a numerical dtype and if a downcast method has been specified
1153+
if downcast is not None and is_numeric_dtype(values):
1154+
typecodes = None
1155+
1156+
if downcast in ('integer', 'signed'):
1157+
typecodes = np.typecodes['Integer']
1158+
elif downcast == 'unsigned' and np.min(values) >= 0:
1159+
typecodes = np.typecodes['UnsignedInteger']
1160+
elif downcast == 'float':
1161+
typecodes = np.typecodes['Float']
1162+
1163+
# pandas support goes only to np.float32,
1164+
# as float dtypes smaller than that are
1165+
# extremely rare and not well supported
1166+
float_32_char = np.dtype(np.float32).char
1167+
float_32_ind = typecodes.index(float_32_char)
1168+
typecodes = typecodes[float_32_ind:]
1169+
1170+
if typecodes is not None:
1171+
# from smallest to largest
1172+
for dtype in typecodes:
1173+
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
1174+
values = maybe_downcast_to_dtype(values, dtype)
1175+
1176+
# successful conversion
1177+
if values.dtype == dtype:
1178+
break
1179+
1180+
if is_series:
1181+
return pd.Series(values, index=arg.index, name=arg.name)
1182+
elif is_index:
1183+
# because we want to coerce to numeric if possible,
1184+
# do not use _shallow_copy_with_infer
1185+
return pd.Index(values, name=arg.name)
1186+
elif is_scalars:
1187+
return values[0]
1188+
else:
1189+
return values

pandas/core/frame.py

+10-10
Original file line numberDiff line numberDiff line change
@@ -3956,7 +3956,7 @@ def pivot(self, index=None, columns=None, values=None):
39563956
39573957
39583958
"""
3959-
from pandas.core.reshape import pivot
3959+
from pandas.core.reshape.reshape import pivot
39603960
return pivot(self, index=index, columns=columns, values=values)
39613961

39623962
def stack(self, level=-1, dropna=True):
@@ -3992,7 +3992,7 @@ def stack(self, level=-1, dropna=True):
39923992
-------
39933993
stacked : DataFrame or Series
39943994
"""
3995-
from pandas.core.reshape import stack, stack_multiple
3995+
from pandas.core.reshape.reshape import stack, stack_multiple
39963996

39973997
if isinstance(level, (tuple, list)):
39983998
return stack_multiple(self, level, dropna=dropna)
@@ -4057,7 +4057,7 @@ def unstack(self, level=-1, fill_value=None):
40574057
-------
40584058
unstacked : DataFrame or Series
40594059
"""
4060-
from pandas.core.reshape import unstack
4060+
from pandas.core.reshape.reshape import unstack
40614061
return unstack(self, level, fill_value)
40624062

40634063
_shared_docs['melt'] = ("""
@@ -4159,7 +4159,7 @@ def unstack(self, level=-1, fill_value=None):
41594159
other='melt'))
41604160
def melt(self, id_vars=None, value_vars=None, var_name=None,
41614161
value_name='value', col_level=None):
4162-
from pandas.core.reshape import melt
4162+
from pandas.core.reshape.reshape import melt
41634163
return melt(self, id_vars=id_vars, value_vars=value_vars,
41644164
var_name=var_name, value_name=value_name,
41654165
col_level=col_level)
@@ -4609,7 +4609,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
46094609
if (self.columns.get_indexer(other.columns) >= 0).all():
46104610
other = other.loc[:, self.columns]
46114611

4612-
from pandas.tools.concat import concat
4612+
from pandas.core.reshape.concat import concat
46134613
if isinstance(other, (list, tuple)):
46144614
to_concat = [self] + other
46154615
else:
@@ -4741,8 +4741,8 @@ def join(self, other, on=None, how='left', lsuffix='', rsuffix='',
47414741

47424742
def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
47434743
sort=False):
4744-
from pandas.tools.merge import merge
4745-
from pandas.tools.concat import concat
4744+
from pandas.core.reshape.merge import merge
4745+
from pandas.core.reshape.concat import concat
47464746

47474747
if isinstance(other, Series):
47484748
if other.name is None:
@@ -4786,7 +4786,7 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
47864786
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
47874787
left_index=False, right_index=False, sort=False,
47884788
suffixes=('_x', '_y'), copy=True, indicator=False):
4789-
from pandas.tools.merge import merge
4789+
from pandas.core.reshape.merge import merge
47904790
return merge(self, right, how=how, on=on, left_on=left_on,
47914791
right_on=right_on, left_index=left_index,
47924792
right_index=right_index, sort=sort, suffixes=suffixes,
@@ -4846,7 +4846,7 @@ def round(self, decimals=0, *args, **kwargs):
48464846
Series.round
48474847
48484848
"""
4849-
from pandas.tools.concat import concat
4849+
from pandas.core.reshape.concat import concat
48504850

48514851
def _dict_round(df, decimals):
48524852
for col, vals in df.iteritems():
@@ -5523,7 +5523,7 @@ def isin(self, values):
55235523
"""
55245524
if isinstance(values, dict):
55255525
from collections import defaultdict
5526-
from pandas.tools.concat import concat
5526+
from pandas.core.reshape.concat import concat
55275527
values = defaultdict(list, values)
55285528
return concat((self.iloc[:, [i]].isin(values[col])
55295529
for i, col in enumerate(self.columns)), axis=1)

0 commit comments

Comments
 (0)