Skip to content

Commit 76b35c6

Browse files
authored
CLN: factor apply out of frame.py (#18754)
1 parent 34ef9eb commit 76b35c6

File tree

4 files changed

+326
-199
lines changed

4 files changed

+326
-199
lines changed

pandas/core/apply.py

+301
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
import numpy as np
2+
from pandas import compat
3+
from pandas._libs import lib
4+
from pandas.core.dtypes.common import (
5+
is_extension_type,
6+
is_sequence)
7+
8+
from pandas.io.formats.printing import pprint_thing
9+
10+
11+
def frame_apply(obj, func, axis=0, broadcast=False,
12+
raw=False, reduce=None, args=(), **kwds):
13+
""" construct and return a row or column based frame apply object """
14+
15+
axis = obj._get_axis_number(axis)
16+
if axis == 0:
17+
klass = FrameRowApply
18+
elif axis == 1:
19+
klass = FrameColumnApply
20+
21+
return klass(obj, func, broadcast=broadcast,
22+
raw=raw, reduce=reduce, args=args, kwds=kwds)
23+
24+
25+
class FrameApply(object):
26+
27+
def __init__(self, obj, func, broadcast, raw, reduce, args, kwds):
28+
self.obj = obj
29+
self.broadcast = broadcast
30+
self.raw = raw
31+
self.reduce = reduce
32+
self.args = args
33+
34+
self.ignore_failures = kwds.pop('ignore_failures', False)
35+
self.kwds = kwds
36+
37+
# curry if needed
38+
if kwds or args and not isinstance(func, np.ufunc):
39+
def f(x):
40+
return func(x, *args, **kwds)
41+
else:
42+
f = func
43+
44+
self.f = f
45+
46+
@property
47+
def columns(self):
48+
return self.obj.columns
49+
50+
@property
51+
def index(self):
52+
return self.obj.index
53+
54+
@property
55+
def values(self):
56+
return self.obj.values
57+
58+
@property
59+
def agg_axis(self):
60+
return self.obj._get_agg_axis(self.axis)
61+
62+
def get_result(self):
63+
""" compute the results """
64+
65+
# all empty
66+
if len(self.columns) == 0 and len(self.index) == 0:
67+
return self.apply_empty_result()
68+
69+
# string dispatch
70+
if isinstance(self.f, compat.string_types):
71+
if self.axis:
72+
self.kwds['axis'] = self.axis
73+
return getattr(self.obj, self.f)(*self.args, **self.kwds)
74+
75+
# ufunc
76+
elif isinstance(self.f, np.ufunc):
77+
with np.errstate(all='ignore'):
78+
results = self.f(self.values)
79+
return self.obj._constructor(data=results, index=self.index,
80+
columns=self.columns, copy=False)
81+
82+
# broadcasting
83+
if self.broadcast:
84+
return self.apply_broadcast()
85+
86+
# one axis empty
87+
if not all(self.obj.shape):
88+
return self.apply_empty_result()
89+
90+
# raw
91+
if self.raw and not self.obj._is_mixed_type:
92+
return self.apply_raw()
93+
94+
return self.apply_standard()
95+
96+
def apply_empty_result(self):
97+
from pandas import Series
98+
reduce = self.reduce
99+
100+
if reduce is None:
101+
reduce = False
102+
103+
EMPTY_SERIES = Series([])
104+
try:
105+
r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
106+
reduce = not isinstance(r, Series)
107+
except Exception:
108+
pass
109+
110+
if reduce:
111+
return Series(np.nan, index=self.agg_axis)
112+
else:
113+
return self.obj.copy()
114+
115+
def apply_raw(self):
116+
try:
117+
result = lib.reduce(self.values, self.f, axis=self.axis)
118+
except Exception:
119+
result = np.apply_along_axis(self.f, self.axis, self.values)
120+
121+
# TODO: mixed type case
122+
from pandas import DataFrame, Series
123+
if result.ndim == 2:
124+
return DataFrame(result, index=self.index, columns=self.columns)
125+
else:
126+
return Series(result, index=self.agg_axis)
127+
128+
def apply_standard(self):
129+
from pandas import Series
130+
131+
reduce = self.reduce
132+
if reduce is None:
133+
reduce = True
134+
135+
# try to reduce first (by default)
136+
# this only matters if the reduction in values is of different dtype
137+
# e.g. if we want to apply to a SparseFrame, then can't directly reduce
138+
if reduce:
139+
values = self.values
140+
141+
# we cannot reduce using non-numpy dtypes,
142+
# as demonstrated in gh-12244
143+
if not is_extension_type(values):
144+
145+
# Create a dummy Series from an empty array
146+
index = self.obj._get_axis(self.axis)
147+
empty_arr = np.empty(len(index), dtype=values.dtype)
148+
149+
dummy = Series(empty_arr, index=index, dtype=values.dtype)
150+
151+
try:
152+
labels = self.agg_axis
153+
result = lib.reduce(values, self.f,
154+
axis=self.axis,
155+
dummy=dummy,
156+
labels=labels)
157+
return Series(result, index=labels)
158+
except Exception:
159+
pass
160+
161+
# compute the result using the series generator
162+
results, res_index, res_columns = self._apply_series_generator()
163+
164+
# wrap results
165+
return self.wrap_results(results, res_index, res_columns)
166+
167+
def _apply_series_generator(self):
168+
series_gen = self.series_generator
169+
res_index = self.result_index
170+
res_columns = self.result_columns
171+
172+
i = None
173+
keys = []
174+
results = {}
175+
if self.ignore_failures:
176+
successes = []
177+
for i, v in enumerate(series_gen):
178+
try:
179+
results[i] = self.f(v)
180+
keys.append(v.name)
181+
successes.append(i)
182+
except Exception:
183+
pass
184+
185+
# so will work with MultiIndex
186+
if len(successes) < len(res_index):
187+
res_index = res_index.take(successes)
188+
189+
else:
190+
try:
191+
for i, v in enumerate(series_gen):
192+
results[i] = self.f(v)
193+
keys.append(v.name)
194+
except Exception as e:
195+
if hasattr(e, 'args'):
196+
197+
# make sure i is defined
198+
if i is not None:
199+
k = res_index[i]
200+
e.args = e.args + ('occurred at index %s' %
201+
pprint_thing(k), )
202+
raise
203+
204+
return results, res_index, res_columns
205+
206+
def wrap_results(self, results, res_index, res_columns):
207+
from pandas import Series
208+
209+
if len(results) > 0 and is_sequence(results[0]):
210+
if not isinstance(results[0], Series):
211+
index = res_columns
212+
else:
213+
index = None
214+
215+
result = self.obj._constructor(data=results, index=index)
216+
result.columns = res_index
217+
218+
if self.axis == 1:
219+
result = result.T
220+
result = result._convert(
221+
datetime=True, timedelta=True, copy=False)
222+
223+
else:
224+
225+
result = Series(results)
226+
result.index = res_index
227+
228+
return result
229+
230+
def _apply_broadcast(self, target):
231+
result_values = np.empty_like(target.values)
232+
columns = target.columns
233+
for i, col in enumerate(columns):
234+
result_values[:, i] = self.f(target[col])
235+
236+
result = self.obj._constructor(result_values, index=target.index,
237+
columns=target.columns)
238+
return result
239+
240+
241+
class FrameRowApply(FrameApply):
242+
axis = 0
243+
244+
def get_result(self):
245+
246+
# dispatch to agg
247+
if isinstance(self.f, (list, dict)):
248+
return self.obj.aggregate(self.f, axis=self.axis,
249+
*self.args, **self.kwds)
250+
251+
return super(FrameRowApply, self).get_result()
252+
253+
def apply_broadcast(self):
254+
return self._apply_broadcast(self.obj)
255+
256+
@property
257+
def series_generator(self):
258+
return (self.obj._ixs(i, axis=1)
259+
for i in range(len(self.columns)))
260+
261+
@property
262+
def result_index(self):
263+
return self.columns
264+
265+
@property
266+
def result_columns(self):
267+
return self.index
268+
269+
270+
class FrameColumnApply(FrameApply):
271+
axis = 1
272+
273+
def __init__(self, obj, func, broadcast, raw, reduce, args, kwds):
274+
super(FrameColumnApply, self).__init__(obj, func, broadcast,
275+
raw, reduce, args, kwds)
276+
277+
# skip if we are mixed datelike and trying reduce across axes
278+
# GH6125
279+
if self.reduce:
280+
if self.obj._is_mixed_type and self.obj._is_datelike_mixed_type:
281+
self.reduce = False
282+
283+
def apply_broadcast(self):
284+
return self._apply_broadcast(self.obj.T).T
285+
286+
@property
287+
def series_generator(self):
288+
from pandas import Series
289+
dtype = object if self.obj._is_mixed_type else None
290+
return (Series._from_array(arr, index=self.columns, name=name,
291+
dtype=dtype)
292+
for i, (arr, name) in enumerate(zip(self.values,
293+
self.index)))
294+
295+
@property
296+
def result_index(self):
297+
return self.index
298+
299+
@property
300+
def result_columns(self):
301+
return self.columns

0 commit comments

Comments
 (0)