Skip to content

Commit b8be12c

Browse files
committed
move to_numeric to pandas.core.tools.numeric
1 parent 8339980 commit b8be12c

File tree

7 files changed

+545
-531
lines changed

7 files changed

+545
-531
lines changed

pandas/core/api.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
lreshape, wide_to_long)
2929

3030
from pandas.core.indexing import IndexSlice
31-
from pandas.core.dtypes.cast import to_numeric
31+
from pandas.core.tools.numeric import to_numeric
3232
from pandas.tseries.offsets import DateOffset
3333
from pandas.core.tools.datetimes import to_datetime
3434
from pandas.core.tools.timedeltas import to_timedelta

pandas/core/dtypes/cast.py

+1-162
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import numpy as np
66
import warnings
77

8-
import pandas as pd
98
from pandas._libs import tslib, lib
109
from pandas._libs.tslib import iNaT
1110
from pandas.compat import string_types, text_type, PY3
@@ -19,8 +18,6 @@
1918
is_integer_dtype,
2019
is_datetime_or_timedelta_dtype,
2120
is_bool_dtype, is_scalar,
22-
is_numeric_dtype, is_decimal,
23-
is_number,
2421
_string_dtypes,
2522
_coerce_to_dtype,
2623
_ensure_int8, _ensure_int16,
@@ -29,7 +26,7 @@
2926
_POSSIBLY_CAST_DTYPES)
3027
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
3128
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
32-
ABCSeries, ABCIndexClass)
29+
ABCSeries)
3330
from .missing import isnull, notnull
3431
from .inference import is_list_like
3532

@@ -1029,161 +1026,3 @@ def find_common_type(types):
10291026
return np.object
10301027

10311028
return np.find_common_type(types, [])
1032-
1033-
1034-
def to_numeric(arg, errors='raise', downcast=None):
1035-
"""
1036-
Convert argument to a numeric type.
1037-
1038-
Parameters
1039-
----------
1040-
arg : list, tuple, 1-d array, or Series
1041-
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
1042-
- If 'raise', then invalid parsing will raise an exception
1043-
- If 'coerce', then invalid parsing will be set as NaN
1044-
- If 'ignore', then invalid parsing will return the input
1045-
downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
1046-
If not None, and if the data has been successfully cast to a
1047-
numerical dtype (or if the data was numeric to begin with),
1048-
downcast that resulting data to the smallest numerical dtype
1049-
possible according to the following rules:
1050-
1051-
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
1052-
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
1053-
- 'float': smallest float dtype (min.: np.float32)
1054-
1055-
As this behaviour is separate from the core conversion to
1056-
numeric values, any errors raised during the downcasting
1057-
will be surfaced regardless of the value of the 'errors' input.
1058-
1059-
In addition, downcasting will only occur if the size
1060-
of the resulting data's dtype is strictly larger than
1061-
the dtype it is to be cast to, so if none of the dtypes
1062-
checked satisfy that specification, no downcasting will be
1063-
performed on the data.
1064-
1065-
.. versionadded:: 0.19.0
1066-
1067-
Returns
1068-
-------
1069-
ret : numeric if parsing succeeded.
1070-
Return type depends on input. Series if Series, otherwise ndarray
1071-
1072-
Examples
1073-
--------
1074-
Take separate series and convert to numeric, coercing when told to
1075-
1076-
>>> import pandas as pd
1077-
>>> s = pd.Series(['1.0', '2', -3])
1078-
>>> pd.to_numeric(s)
1079-
0 1.0
1080-
1 2.0
1081-
2 -3.0
1082-
dtype: float64
1083-
>>> pd.to_numeric(s, downcast='float')
1084-
0 1.0
1085-
1 2.0
1086-
2 -3.0
1087-
dtype: float32
1088-
>>> pd.to_numeric(s, downcast='signed')
1089-
0 1
1090-
1 2
1091-
2 -3
1092-
dtype: int8
1093-
>>> s = pd.Series(['apple', '1.0', '2', -3])
1094-
>>> pd.to_numeric(s, errors='ignore')
1095-
0 apple
1096-
1 1.0
1097-
2 2
1098-
3 -3
1099-
dtype: object
1100-
>>> pd.to_numeric(s, errors='coerce')
1101-
0 NaN
1102-
1 1.0
1103-
2 2.0
1104-
3 -3.0
1105-
dtype: float64
1106-
"""
1107-
if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
1108-
raise ValueError('invalid downcasting method provided')
1109-
1110-
is_series = False
1111-
is_index = False
1112-
is_scalars = False
1113-
1114-
if isinstance(arg, ABCSeries):
1115-
is_series = True
1116-
values = arg.values
1117-
elif isinstance(arg, ABCIndexClass):
1118-
is_index = True
1119-
values = arg.asi8
1120-
if values is None:
1121-
values = arg.values
1122-
elif isinstance(arg, (list, tuple)):
1123-
values = np.array(arg, dtype='O')
1124-
elif is_scalar(arg):
1125-
if is_decimal(arg):
1126-
return float(arg)
1127-
if is_number(arg):
1128-
return arg
1129-
is_scalars = True
1130-
values = np.array([arg], dtype='O')
1131-
elif getattr(arg, 'ndim', 1) > 1:
1132-
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
1133-
else:
1134-
values = arg
1135-
1136-
try:
1137-
if is_numeric_dtype(values):
1138-
pass
1139-
elif is_datetime_or_timedelta_dtype(values):
1140-
values = values.astype(np.int64)
1141-
else:
1142-
values = _ensure_object(values)
1143-
coerce_numeric = False if errors in ('ignore', 'raise') else True
1144-
values = lib.maybe_convert_numeric(values, set(),
1145-
coerce_numeric=coerce_numeric)
1146-
1147-
except Exception:
1148-
if errors == 'raise':
1149-
raise
1150-
1151-
# attempt downcast only if the data has been successfully converted
1152-
# to a numerical dtype and if a downcast method has been specified
1153-
if downcast is not None and is_numeric_dtype(values):
1154-
typecodes = None
1155-
1156-
if downcast in ('integer', 'signed'):
1157-
typecodes = np.typecodes['Integer']
1158-
elif downcast == 'unsigned' and np.min(values) >= 0:
1159-
typecodes = np.typecodes['UnsignedInteger']
1160-
elif downcast == 'float':
1161-
typecodes = np.typecodes['Float']
1162-
1163-
# pandas support goes only to np.float32,
1164-
# as float dtypes smaller than that are
1165-
# extremely rare and not well supported
1166-
float_32_char = np.dtype(np.float32).char
1167-
float_32_ind = typecodes.index(float_32_char)
1168-
typecodes = typecodes[float_32_ind:]
1169-
1170-
if typecodes is not None:
1171-
# from smallest to largest
1172-
for dtype in typecodes:
1173-
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
1174-
values = maybe_downcast_to_dtype(values, dtype)
1175-
1176-
# successful conversion
1177-
if values.dtype == dtype:
1178-
break
1179-
1180-
if is_series:
1181-
return pd.Series(values, index=arg.index, name=arg.name)
1182-
elif is_index:
1183-
# because we want to coerce to numeric if possible,
1184-
# do not use _shallow_copy_with_infer
1185-
return pd.Index(values, name=arg.name)
1186-
elif is_scalars:
1187-
return values[0]
1188-
else:
1189-
return values

pandas/core/tools/numeric.py

+170
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import numpy as np
2+
import pandas as pd
3+
from pandas.core.dtypes.common import (
4+
is_scalar,
5+
is_numeric_dtype,
6+
is_decimal,
7+
is_datetime_or_timedelta_dtype,
8+
is_number,
9+
_ensure_object)
10+
from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
11+
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
12+
from pandas._libs import lib
13+
14+
15+
def to_numeric(arg, errors='raise', downcast=None):
16+
"""
17+
Convert argument to a numeric type.
18+
19+
Parameters
20+
----------
21+
arg : list, tuple, 1-d array, or Series
22+
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
23+
- If 'raise', then invalid parsing will raise an exception
24+
- If 'coerce', then invalid parsing will be set as NaN
25+
- If 'ignore', then invalid parsing will return the input
26+
downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
27+
If not None, and if the data has been successfully cast to a
28+
numerical dtype (or if the data was numeric to begin with),
29+
downcast that resulting data to the smallest numerical dtype
30+
possible according to the following rules:
31+
32+
- 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
33+
- 'unsigned': smallest unsigned int dtype (min.: np.uint8)
34+
- 'float': smallest float dtype (min.: np.float32)
35+
36+
As this behaviour is separate from the core conversion to
37+
numeric values, any errors raised during the downcasting
38+
will be surfaced regardless of the value of the 'errors' input.
39+
40+
In addition, downcasting will only occur if the size
41+
of the resulting data's dtype is strictly larger than
42+
the dtype it is to be cast to, so if none of the dtypes
43+
checked satisfy that specification, no downcasting will be
44+
performed on the data.
45+
46+
.. versionadded:: 0.19.0
47+
48+
Returns
49+
-------
50+
ret : numeric if parsing succeeded.
51+
Return type depends on input. Series if Series, otherwise ndarray
52+
53+
Examples
54+
--------
55+
Take separate series and convert to numeric, coercing when told to
56+
57+
>>> import pandas as pd
58+
>>> s = pd.Series(['1.0', '2', -3])
59+
>>> pd.to_numeric(s)
60+
0 1.0
61+
1 2.0
62+
2 -3.0
63+
dtype: float64
64+
>>> pd.to_numeric(s, downcast='float')
65+
0 1.0
66+
1 2.0
67+
2 -3.0
68+
dtype: float32
69+
>>> pd.to_numeric(s, downcast='signed')
70+
0 1
71+
1 2
72+
2 -3
73+
dtype: int8
74+
>>> s = pd.Series(['apple', '1.0', '2', -3])
75+
>>> pd.to_numeric(s, errors='ignore')
76+
0 apple
77+
1 1.0
78+
2 2
79+
3 -3
80+
dtype: object
81+
>>> pd.to_numeric(s, errors='coerce')
82+
0 NaN
83+
1 1.0
84+
2 2.0
85+
3 -3.0
86+
dtype: float64
87+
"""
88+
if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
89+
raise ValueError('invalid downcasting method provided')
90+
91+
is_series = False
92+
is_index = False
93+
is_scalars = False
94+
95+
if isinstance(arg, ABCSeries):
96+
is_series = True
97+
values = arg.values
98+
elif isinstance(arg, ABCIndexClass):
99+
is_index = True
100+
values = arg.asi8
101+
if values is None:
102+
values = arg.values
103+
elif isinstance(arg, (list, tuple)):
104+
values = np.array(arg, dtype='O')
105+
elif is_scalar(arg):
106+
if is_decimal(arg):
107+
return float(arg)
108+
if is_number(arg):
109+
return arg
110+
is_scalars = True
111+
values = np.array([arg], dtype='O')
112+
elif getattr(arg, 'ndim', 1) > 1:
113+
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
114+
else:
115+
values = arg
116+
117+
try:
118+
if is_numeric_dtype(values):
119+
pass
120+
elif is_datetime_or_timedelta_dtype(values):
121+
values = values.astype(np.int64)
122+
else:
123+
values = _ensure_object(values)
124+
coerce_numeric = False if errors in ('ignore', 'raise') else True
125+
values = lib.maybe_convert_numeric(values, set(),
126+
coerce_numeric=coerce_numeric)
127+
128+
except Exception:
129+
if errors == 'raise':
130+
raise
131+
132+
# attempt downcast only if the data has been successfully converted
133+
# to a numerical dtype and if a downcast method has been specified
134+
if downcast is not None and is_numeric_dtype(values):
135+
typecodes = None
136+
137+
if downcast in ('integer', 'signed'):
138+
typecodes = np.typecodes['Integer']
139+
elif downcast == 'unsigned' and np.min(values) >= 0:
140+
typecodes = np.typecodes['UnsignedInteger']
141+
elif downcast == 'float':
142+
typecodes = np.typecodes['Float']
143+
144+
# pandas support goes only to np.float32,
145+
# as float dtypes smaller than that are
146+
# extremely rare and not well supported
147+
float_32_char = np.dtype(np.float32).char
148+
float_32_ind = typecodes.index(float_32_char)
149+
typecodes = typecodes[float_32_ind:]
150+
151+
if typecodes is not None:
152+
# from smallest to largest
153+
for dtype in typecodes:
154+
if np.dtype(dtype).itemsize <= values.dtype.itemsize:
155+
values = maybe_downcast_to_dtype(values, dtype)
156+
157+
# successful conversion
158+
if values.dtype == dtype:
159+
break
160+
161+
if is_series:
162+
return pd.Series(values, index=arg.index, name=arg.name)
163+
elif is_index:
164+
# because we want to coerce to numeric if possible,
165+
# do not use _shallow_copy_with_infer
166+
return pd.Index(values, name=arg.name)
167+
elif is_scalars:
168+
return values[0]
169+
else:
170+
return values

0 commit comments

Comments
 (0)