Skip to content

Commit b18b74f

Browse files
committed
BUG: Allow apply to be used with non-numpy-dtype DataFrames
Fixes bug in DataFrame.apply by avoiding reducing DataFrames whose values dtype is not a numpy dtype. Closes pandas-devgh-12244.
1 parent 0c09bd1 commit b18b74f

File tree

3 files changed

+34
-13
lines changed

3 files changed

+34
-13
lines changed

doc/source/whatsnew/v0.18.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -967,4 +967,7 @@ Bug Fixes
967967
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
968968

969969
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
970+
970971
- Bug in ``crosstab`` where arguments with non-overlapping indexes would return a ``KeyError`` (:issue:`10291`)
972+
973+
- Bug in ``DataFrame.apply`` in which reduction was not being prevented for cases in which ``dtype`` was not a numpy dtype (:issue:`12244`)

pandas/core/frame.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -4072,22 +4072,24 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
40724072
# this only matters if the reduction in values is of different dtype
40734073
# e.g. if we want to apply to a SparseFrame, then can't directly reduce
40744074
if reduce:
4075-
40764075
values = self.values
40774076

4078-
# Create a dummy Series from an empty array
4079-
index = self._get_axis(axis)
4080-
empty_arr = np.empty(len(index), dtype=values.dtype)
4081-
dummy = Series(empty_arr, index=self._get_axis(axis),
4082-
dtype=values.dtype)
4077+
# we cannot reduce using non-numpy dtypes,
4078+
# as demonstrated in gh-12244
4079+
if not is_internal_type(values):
4080+
# Create a dummy Series from an empty array
4081+
index = self._get_axis(axis)
4082+
empty_arr = np.empty(len(index), dtype=values.dtype)
4083+
dummy = Series(empty_arr, index=self._get_axis(axis),
4084+
dtype=values.dtype)
40834085

4084-
try:
4085-
labels = self._get_agg_axis(axis)
4086-
result = lib.reduce(values, func, axis=axis, dummy=dummy,
4087-
labels=labels)
4088-
return Series(result, index=labels)
4089-
except Exception:
4090-
pass
4086+
try:
4087+
labels = self._get_agg_axis(axis)
4088+
result = lib.reduce(values, func, axis=axis, dummy=dummy,
4089+
labels=labels)
4090+
return Series(result, index=labels)
4091+
except Exception:
4092+
pass
40914093

40924094
dtype = object if self._is_mixed_type else None
40934095
if axis == 0:

pandas/tests/frame/test_apply.py

+16
Original file line numberDiff line numberDiff line change
@@ -400,3 +400,19 @@ def test_applymap(self):
400400
result = df.applymap(str)
401401
for f in ['datetime', 'timedelta']:
402402
self.assertEqual(result.loc[0, f], str(df.loc[0, f]))
403+
404+
# See gh-12244
405+
def test_apply_non_numpy_dtype(self):
406+
df = DataFrame({'dt': pd.date_range(
407+
"2015-01-01", periods=3, tz='Europe/Brussels')})
408+
result = df.apply(lambda x: x)
409+
assert_frame_equal(result, df)
410+
411+
result = df.apply(lambda x: x + pd.Timedelta('1day'))
412+
expected = DataFrame({'dt': pd.date_range(
413+
"2015-01-02", periods=3, tz='Europe/Brussels')})
414+
assert_frame_equal(result, expected)
415+
416+
df = DataFrame({'dt': ['a', 'b', 'c', 'a']}, dtype='category')
417+
result = df.apply(lambda x: x)
418+
assert_frame_equal(result, df)

0 commit comments

Comments
 (0)