BUG-23744 DataFrame.apply keeps dtype sparseness

JustinZhengBC · JustinZhengBC · commit b85bdb95fd55 · 2018-11-17T10:55:54.000-08:00
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1259,6 +1259,7 @@ Numeric
 - Bug in :meth:`Series.rpow` with object dtype ``NaN`` for ``1 ** NA`` instead of ``1`` (:issue:`22922`).
 - :meth:`Series.agg` can now handle numpy NaN-aware methods like :func:`numpy.nansum` (:issue:`19629`)
 - Bug in :meth:`Series.rank` and :meth:`DataFrame.rank` when ``pct=True`` and more than 2:sup:`24` rows are present resulted in percentages greater than 1.0 (:issue:`18271`)
+- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
 
 Strings
 ^^^^^^^
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -7,7 +7,8 @@
     is_extension_type,
     is_dict_like,
     is_list_like,
-    is_sequence)
+    is_sequence,
+    is_sparse)
 from pandas.util._decorators import cache_readonly
 
 from pandas.io.formats.printing import pprint_thing
@@ -133,8 +134,14 @@ def get_result(self):
         elif isinstance(self.f, np.ufunc):
             with np.errstate(all='ignore'):
                 results = self.f(self.values)
-            return self.obj._constructor(data=results, index=self.index,
-                                         columns=self.columns, copy=False)
+            result = self.obj._constructor(data=results, index=self.index,
+                                           columns=self.columns, copy=False)
+            for col in range(self.obj.shape[1]):
+                if is_sparse(self.obj.dtypes.values[col]):
+                    fill = self.f(self.obj.dtypes.values[col].fill_value)
+                    sparse_col = result.iloc[:, col].to_sparse(fill_value=fill)
+                    result.iloc[:, col] = sparse_col
+            return result
 
         # broadcasting
         if self.result_type == 'broadcast':
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
@@ -570,6 +570,16 @@ def test_apply_dup_names_multi_agg(self):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_apply_keep_sparse_dtype(self):
+        # GH 23744
+        df = pd.SparseDataFrame(np.array([[0, 1, 0], [0, 0, 0], [0, 0, 1]]),
+                                columns=['a', 'b', 'c'], default_fill_value=1)
+        df2 = pd.DataFrame(df)
+
+        df = df.apply(np.exp)
+        df2 = df2.apply(np.exp)
+        tm.assert_frame_equal(df, df2)
+
 
 class TestInferOutputShape(object):
     # the user has supplied an opaque UDF where