forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.py
129 lines (104 loc) · 3.54 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import numpy as np
import pandas.lib as lib
import pandas as pd
from pandas.compat import reduce
from pandas.core.index import Index
from pandas.core import common as com
def match(needles, haystack):
haystack = Index(haystack)
needles = Index(needles)
return haystack.get_indexer(needles)
def cartesian_product(X):
"""
Numpy version of itertools.product or pandas.compat.product.
Sometimes faster (for large inputs)...
Examples
--------
>>> cartesian_product([list('ABC'), [1, 2]])
[array(['A', 'A', 'B', 'B', 'C', 'C'], dtype='|S1'),
array([1, 2, 1, 2, 1, 2])]
"""
lenX = np.fromiter((len(x) for x in X), dtype=int)
cumprodX = np.cumproduct(lenX)
a = np.roll(cumprodX, 1)
a[0] = 1
b = cumprodX[-1] / cumprodX
return [np.tile(np.repeat(np.asarray(com._values_from_object(x)), b[i]),
np.product(a[i]))
for i, x in enumerate(X)]
def _compose2(f, g):
"""Compose 2 callables"""
return lambda *args, **kwargs: f(g(*args, **kwargs))
def compose(*funcs):
"""Compose 2 or more callables"""
assert len(funcs) > 1, 'At least 2 callables must be passed to compose'
return reduce(_compose2, funcs)
def to_numeric(arg, errors='raise'):
"""
Convert argument to a numeric type.
Parameters
----------
arg : list, tuple, 1-d array, or Series
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as NaN
- If 'ignore', then invalid parsing will return the input
Returns
-------
ret : numeric if parsing succeeded.
Return type depends on input. Series if Series, otherwise ndarray
Examples
--------
Take separate series and convert to numeric, coercing when told to
>>> import pandas as pd
>>> s = pd.Series(['1.0', '2', -3])
>>> pd.to_numeric(s)
>>> s = pd.Series(['apple', '1.0', '2', -3])
>>> pd.to_numeric(s, errors='ignore')
>>> pd.to_numeric(s, errors='coerce')
"""
is_series = False
is_index = False
is_scalar = False
if isinstance(arg, pd.Series):
is_series = True
values = arg.values
elif isinstance(arg, pd.Index):
is_index = True
values = arg.asi8
if values is None:
values = arg.values
elif isinstance(arg, (list, tuple)):
values = np.array(arg, dtype='O')
elif np.isscalar(arg):
if com.is_number(arg):
return arg
is_scalar = True
values = np.array([arg], dtype='O')
elif getattr(arg, 'ndim', 1) > 1:
raise TypeError('arg must be a list, tuple, 1-d array, or Series')
else:
values = arg
if com.is_numeric_dtype(values):
pass
elif com.is_datetime_or_timedelta_dtype(values):
values = values.astype(np.int64)
else:
values = com._ensure_object(values)
coerce_numeric = False if errors in ('ignore', 'raise') else True
try:
values = lib.maybe_convert_numeric(values, set(),
coerce_numeric=coerce_numeric)
except:
if errors == 'raise':
raise
if is_series:
return pd.Series(values, index=arg.index, name=arg.name)
elif is_index:
# because we want to coerce to numeric if possible,
# do not use _shallow_copy_with_infer
return Index(values, name=arg.name)
elif is_scalar:
return values[0]
else:
return values