Skip to content

Commit b85bdb9

Browse files
committed
BUG-23744 DataFrame.apply keeps dtype sparseness
1 parent db2066b commit b85bdb9

File tree

3 files changed

+21
-3
lines changed

3 files changed

+21
-3
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,7 @@ Numeric
12591259
- Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`).
12601260
- :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
12611261
- Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
1262+
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
12621263

12631264
Strings
12641265
^^^^^^^

pandas/core/apply.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
is_extension_type,
88
is_dict_like,
99
is_list_like,
10-
is_sequence)
10+
is_sequence,
11+
is_sparse)
1112
from pandas.util._decorators import cache_readonly
1213

1314
from pandas.io.formats.printing import pprint_thing
@@ -133,8 +134,14 @@ def get_result(self):
133134
elif isinstance(self.f, np.ufunc):
134135
with np.errstate(all='ignore'):
135136
results = self.f(self.values)
136-
return self.obj._constructor(data=results, index=self.index,
137-
columns=self.columns, copy=False)
137+
result = self.obj._constructor(data=results, index=self.index,
138+
columns=self.columns, copy=False)
139+
for col in range(self.obj.shape[1]):
140+
if is_sparse(self.obj.dtypes.values[col]):
141+
fill = self.f(self.obj.dtypes.values[col].fill_value)
142+
sparse_col = result.iloc[:, col].to_sparse(fill_value=fill)
143+
result.iloc[:, col] = sparse_col
144+
return result
138145

139146
# broadcasting
140147
if self.result_type == 'broadcast':

pandas/tests/frame/test_apply.py

+10
Original file line numberDiff line numberDiff line change
@@ -570,6 +570,16 @@ def test_apply_dup_names_multi_agg(self):
570570

571571
tm.assert_frame_equal(result, expected)
572572

573+
def test_apply_keep_sparse_dtype(self):
574+
# GH 23744
575+
df = pd.SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
576+
columns=['a', 'b', 'c'], default_fill_value=1)
577+
df2 = pd.DataFrame(df)
578+
579+
df = df.apply(np.exp)
580+
df2 = df2.apply(np.exp)
581+
tm.assert_frame_equal(df, df2)
582+
573583

574584
class TestInferOutputShape(object):
575585
# the user has supplied an opaque UDF where

0 commit comments

Comments
 (0)