Skip to content

Commit dcaf64b

Browse files
author
Mateusz Górski
committed
Change approach to pandas-dev#28773 fix
In new solution, existing machinery is used to apply the function column-wise, and to recreate final result.
1 parent 7b6e793 commit dcaf64b

File tree

2 files changed

+24
-60
lines changed

2 files changed

+24
-60
lines changed

pandas/core/apply.py

+14-3
Original file line numberDiff line numberDiff line change
@@ -274,13 +274,16 @@ def apply_standard(self):
274274

275275
# we cannot reduce using non-numpy dtypes,
276276
# as demonstrated in gh-12244
277-
if (
277+
flag = (
278278
self.result_type in ["reduce", None]
279279
and not self.dtypes.apply(is_extension_array_dtype).any()
280280
# Disallow complex_internals since libreduction shortcut
281281
# cannot handle MultiIndex
282282
and not isinstance(self.agg_axis, ABCMultiIndex)
283-
):
283+
)
284+
return_result = None
285+
286+
if flag:
284287

285288
values = self.values
286289
index = self.obj._get_axis(self.axis)
@@ -308,11 +311,19 @@ def apply_standard(self):
308311
# reached via numexpr; fall back to python implementation
309312
pass
310313
else:
311-
return self.obj._constructor_sliced(result, index=labels)
314+
return_result = self.obj._constructor_sliced(result, index=labels)
315+
if self.axis != 0 and self.axis != "index":
316+
return return_result
312317

313318
# compute the result using the series generator
314319
results, res_index = self.apply_series_generator()
315320

321+
if flag and return_result is not None:
322+
results = np.array([v for v in results.values()])
323+
return self.obj._constructor_sliced(
324+
results, index=res_index, dtype=return_result.dtype
325+
)
326+
316327
# wrap results
317328
return self.wrap_results(results, res_index)
318329

pandas/core/frame.py

+10-57
Original file line numberDiff line numberDiff line change
@@ -6564,63 +6564,16 @@ def apply(self, func, axis=0, raw=False, result_type=None, args=(), **kwds):
65646564
"""
65656565
from pandas.core.apply import frame_apply
65666566

6567-
# Old apply function, which will be used for each part of DataFrame
6568-
def partial_apply(dataframe):
6569-
op = frame_apply(
6570-
dataframe,
6571-
func=func,
6572-
axis=axis,
6573-
raw=raw,
6574-
result_type=result_type,
6575-
args=args,
6576-
kwds=kwds,
6577-
)
6578-
return op.get_result()
6579-
6580-
def get_dtype(dataframe, column):
6581-
return dataframe.dtypes.values[column]
6582-
6583-
if axis == 0 or axis == "index":
6584-
if self.shape[1] == 0:
6585-
return partial_apply(self)
6586-
6587-
frame = self.iloc[:, [0]]
6588-
result = partial_apply(frame)
6589-
if isinstance(result, Series):
6590-
results = result.values
6591-
else:
6592-
results = result
6593-
6594-
i = 1
6595-
while i < self.shape[1]:
6596-
type = get_dtype(self, i)
6597-
j = i + 1
6598-
6599-
# While the dtype of column is the same as previous ones,
6600-
# they are handled together
6601-
while j < self.shape[1] and pandas.core.dtypes.common.is_dtype_equal(
6602-
type, get_dtype(self, j)
6603-
):
6604-
j += 1
6605-
frame = self.iloc[:, i:j]
6606-
i = j
6607-
result = partial_apply(frame)
6608-
6609-
if isinstance(result, Series):
6610-
results = np.append(results, result.values)
6611-
else:
6612-
for k in range(result.shape[0], results.shape[0]):
6613-
result.loc[k, :] = np.nan
6614-
for k in range(results.shape[0], result.shape[0]):
6615-
results.loc[k, :] = np.nan
6616-
results = pandas.concat([results, result], axis=1)
6617-
6618-
if isinstance(result, Series):
6619-
return Series(results, index=self.columns)
6620-
else:
6621-
return results
6622-
else:
6623-
return partial_apply(self)
6567+
op = frame_apply(
6568+
self,
6569+
func=func,
6570+
axis=axis,
6571+
raw=raw,
6572+
result_type=result_type,
6573+
args=args,
6574+
kwds=kwds,
6575+
)
6576+
return op.get_result()
66246577

66256578
def applymap(self, func):
66266579
"""

0 commit comments

Comments
 (0)