Skip to content

Commit 376cef5

Browse files
committed
move to_numeric
1 parent cc6e059 commit 376cef5

File tree

13 files changed

+620
-2698
lines changed

13 files changed

+620
-2698
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
13351335

13361336
.. csv-table::
13371337
:header: "Previous Location", "New Location", "Deprecated"
1338-
:widths: 30, 30, 20
1338+
:widths: 30, 30, 4
13391339

13401340
"pandas.lib", "pandas._libs.lib", "X"
13411341
"pandas.tslib", "pandas._libs.tslib", "X"
@@ -1349,7 +1349,7 @@ If indicated, a deprecation warning will be issued if you reference theses modul
13491349
"pandas.parser", "pandas.io.libparsers", "X"
13501350
"pandas.formats", "pandas.io.formats", ""
13511351
"pandas.sparse", "pandas.core.sparse", ""
1352-
"pandas.tools", "pandas.core.tools", "pandas.tools.plotting"
1352+
"pandas.tools", "pandas.core.reshape", ""
13531353
"pandas.types", "pandas.core.dtypes", ""
13541354
"pandas.io.sas.saslib", "pandas.io.sas.libsas", ""
13551355
"pandas._join", "pandas._libs.join", ""

pandas/__init__.py

-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
'pandas.scatter_matrix', pandas.plotting.scatter_matrix,
5555
'pandas.plotting.scatter_matrix')
5656

57-
from pandas.core.reshape.util import to_numeric
5857
from pandas.util.print_versions import show_versions
5958
from pandas.io.api import *
6059
from pandas.util._tester import test

pandas/core/api.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,12 @@
1818
from pandas.core.frame import DataFrame
1919
from pandas.core.panel import Panel, WidePanel
2020
from pandas.core.panel4d import Panel4D
21-
from pandas.core.reshape.reshape import (pivot_simple as pivot, get_dummies,
22-
lreshape, wide_to_long)
21+
from pandas.core.reshape.reshape import (
22+
pivot_simple as pivot, get_dummies,
23+
lreshape, wide_to_long)
2324

2425
from pandas.core.indexing import IndexSlice
26+
from pandas.core.dtypes.cast import to_numeric
2527
from pandas.tseries.offsets import DateOffset
2628
from pandas.tseries.tools import to_datetime
2729
from pandas.tseries.index import (DatetimeIndex, Timestamp,

pandas/core/dtypes/cast.py

+163-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import numpy as np
66
import warnings
77

8+
import pandas as pd
89
from pandas._libs import tslib, lib
910
from pandas._libs.tslib import iNaT
1011
from pandas.compat import string_types, text_type, PY3
@@ -18,14 +19,17 @@
1819
is_integer_dtype,
1920
is_datetime_or_timedelta_dtype,
2021
is_bool_dtype, is_scalar,
22+
is_numeric_dtype, is_decimal,
23+
is_number,
2124
_string_dtypes,
2225
_coerce_to_dtype,
2326
_ensure_int8, _ensure_int16,
2427
_ensure_int32, _ensure_int64,
2528
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
2629
_POSSIBLY_CAST_DTYPES)
2730
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
28-
from .generic import ABCDatetimeIndex, ABCPeriodIndex, ABCSeries
31+
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
32+
ABCSeries, ABCIndexClass)
2933
from .missing import isnull, notnull
3034
from .inference import is_list_like
3135

@@ -1025,3 +1029,161 @@ def find_common_type(types):
10251029
return np.object
10261030

10271031
return np.find_common_type(types, [])
1032+
1033+
1034+
def to_numeric(arg, errors='raise', downcast=None):
1035+
"""
1036+
Convert argument to a numeric type.
1037+
1038+
Parameters
1039+
----------
1040+
arg : list, tuple, 1-d array, or Series
1041+
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
1042+
- If 'raise', then invalid parsing will raise an exception
1043+
- If 'coerce', then invalid parsing will be set as NaN
1044+
- If 'ignore', then invalid parsing will return the input
1045+
downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
1046+
If not None, and if the data has been successfully cast to a
1047+
numerical dtype (or if the data was numeric to begin with),
1048+
downcast that resulting data to the smallest numerical dtype
1049+
possible according to the following rules:
1050+
1051+
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
1052+
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
1053+
- 'float': smallest float dtype (min.: np.float32)
1054+
1055+
As this behaviour is separate from the core conversion to
1056+
numeric values, any errors raised during the downcasting
1057+
will be surfaced regardless of the value of the 'errors' input.
1058+
1059+
In addition, downcasting will only occur if the size
1060+
of the resulting data's dtype is strictly larger than
1061+
the dtype it is to be cast to, so if none of the dtypes
1062+
checked satisfy that specification, no downcasting will be
1063+
performed on the data.
1064+
1065+
.. versionadded:: 0.19.0
1066+
1067+
Returns
1068+
-------
1069+
ret : numeric if parsing succeeded.
1070+
Return type depends on input. Series if Series, otherwise ndarray
1071+
1072+
Examples
1073+
--------
1074+
Take separate series and convert to numeric, coercing when told to
1075+
1076+
>>> import pandas as pd
1077+
>>> s = pd.Series(['1.0', '2', -3])
1078+
>>> pd.to_numeric(s)
1079+
0 1.0
1080+
1 2.0
1081+
2 -3.0
1082+
dtype: float64
1083+
>>> pd.to_numeric(s, downcast='float')
1084+
0 1.0
1085+
1 2.0
1086+
2 -3.0
1087+
dtype: float32
1088+
>>> pd.to_numeric(s, downcast='signed')
1089+
0 1
1090+
1 2
1091+
2 -3
1092+
dtype: int8
1093+
>>> s = pd.Series(['apple', '1.0', '2', -3])
1094+
>>> pd.to_numeric(s, errors='ignore')
1095+
0 apple
1096+
1 1.0
1097+
2 2
1098+
3 -3
1099+
dtype: object
1100+
>>> pd.to_numeric(s, errors='coerce')
1101+
0 NaN
1102+
1 1.0
1103+
2 2.0
1104+
3 -3.0
1105+
dtype: float64
1106+
"""
1107+
if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
1108+
raise ValueError('invalid downcasting method provided')
1109+
1110+
is_series = False
1111+
is_index = False
1112+
is_scalars = False
1113+
1114+
if isinstance(arg, ABCSeries):
1115+
is_series = True
1116+
values = arg.values
1117+
elif isinstance(arg, ABCIndexClass):
1118+
is_index = True
1119+
values = arg.asi8
1120+
if values is None:
1121+
values = arg.values
1122+
elif isinstance(arg, (list, tuple)):
1123+
values = np.array(arg, dtype='O')
1124+
elif is_scalar(arg):
1125+
if is_decimal(arg):
1126+
return float(arg)
1127+
if is_number(arg):
1128+
return arg
1129+
is_scalars = True
1130+
values = np.array([arg], dtype='O')
1131+
elif getattr(arg, 'ndim', 1) > 1:
1132+
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
1133+
else:
1134+
values = arg
1135+
1136+
try:
1137+
if is_numeric_dtype(values):
1138+
pass
1139+
elif is_datetime_or_timedelta_dtype(values):
1140+
values = values.astype(np.int64)
1141+
else:
1142+
values = _ensure_object(values)
1143+
coerce_numeric = False if errors in ('ignore', 'raise') else True
1144+
values = lib.maybe_convert_numeric(values, set(),
1145+
coerce_numeric=coerce_numeric)
1146+
1147+
except Exception:
1148+
if errors == 'raise':
1149+
raise
1150+
1151+
# attempt downcast only if the data has been successfully converted
1152+
# to a numerical dtype and if a downcast method has been specified
1153+
if downcast is not None and is_numeric_dtype(values):
1154+
typecodes = None
1155+
1156+
if downcast in ('integer', 'signed'):
1157+
typecodes = np.typecodes['Integer']
1158+
elif downcast == 'unsigned' and np.min(values) >= 0:
1159+
typecodes = np.typecodes['UnsignedInteger']
1160+
elif downcast == 'float':
1161+
typecodes = np.typecodes['Float']
1162+
1163+
# pandas support goes only to np.float32,
1164+
# as float dtypes smaller than that are
1165+
# extremely rare and not well supported
1166+
float_32_char = np.dtype(np.float32).char
1167+
float_32_ind = typecodes.index(float_32_char)
1168+
typecodes = typecodes[float_32_ind:]
1169+
1170+
if typecodes is not None:
1171+
# from smallest to largest
1172+
for dtype in typecodes:
1173+
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
1174+
values = maybe_downcast_to_dtype(values, dtype)
1175+
1176+
# successful conversion
1177+
if values.dtype == dtype:
1178+
break
1179+
1180+
if is_series:
1181+
return pd.Series(values, index=arg.index, name=arg.name)
1182+
elif is_index:
1183+
# because we want to coerce to numeric if possible,
1184+
# do not use _shallow_copy_with_infer
1185+
return pd.Index(values, name=arg.name)
1186+
elif is_scalars:
1187+
return values[0]
1188+
else:
1189+
return values

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3509,7 +3509,7 @@ def _decide_output_index(self, output, labels):
35093509

35103510
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
35113511
from pandas.core.index import _all_indexes_same
3512-
from pandas.core.reshape.util import to_numeric
3512+
from pandas.core.dtypes.cast import to_numeric
35133513

35143514
if len(keys) == 0:
35153515
return DataFrame(index=keys)

pandas/core/reshape/api.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
# flake8: noqa
2+
13
from pandas.core.reshape.concat import concat
24
from pandas.core.reshape.reshape import melt
3-
from pandas.core.reshape.merge import merge, ordered_merge, merge_ordered, merge_asof
5+
from pandas.core.reshape.merge import (
6+
merge, ordered_merge, merge_ordered, merge_asof)
47
from pandas.core.reshape.pivot import pivot_table, crosstab
58
from pandas.core.reshape.tile import cut, qcut

pandas/core/reshape/merge.py

-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from pandas.compat import range, lzip, zip, map, filter
1111
import pandas.compat as compat
1212

13-
import pandas as pd
1413
from pandas import (Categorical, Series, DataFrame,
1514
Index, MultiIndex, Timedelta)
1615
from pandas.core.frame import _merge_doc

0 commit comments

Comments
 (0)