ENH: Fixed DF.apply for functions returning a dict (closes #8735)

ringw · ringw · commit f235902310b7 · 2015-08-27T10:26:42.000-04:00
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -218,6 +218,8 @@ Other enhancements
 
 - Support pickling of ``Period`` objects (:issue:`10439`)
 
+- ``DataFrame.apply`` will return a Series of dicts if the passed function returns a dict and ``reduce=True`` (:issue:`8735`).
+
 .. _whatsnew_0170.api:
 
 .. _whatsnew_0170.api_breaking:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3922,10 +3922,13 @@ def _apply_standard(self, func, axis, ignore_failures=False, reduce=True):
         if reduce:
 
             try:
-
                 # the is the fast-path
                 values = self.values
-                dummy = Series(NA, index=self._get_axis(axis),
+                # Create a dummy Series from an empty array
+                # Unlike filling with NA, this works for any dtype
+                index = self._get_axis(axis)
+                empty_arr = np.empty(len(index), dtype=values.dtype)
+                dummy = Series(empty_arr, index=self._get_axis(axis),
                                dtype=values.dtype)
 
                 labels = self._get_agg_axis(axis)
diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
@@ -133,7 +133,7 @@ cdef class Reducer:
                 else:
                     res = self.f(chunk)
 
-                if hasattr(res,'values'):
+                if hasattr(res,'values') and isinstance(res.values, np.ndarray):
                     res = res.values
                 if i == 0:
                     result = _get_result_array(res,
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -11255,6 +11255,25 @@ def test_apply_multi_index(self):
         res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1)
         tm.assertIsInstance(res.index, MultiIndex)
 
+    def test_apply_dict(self):
+
+        # GH 8735
+        A = DataFrame([['foo', 'bar'], ['spam', 'eggs']])
+        A_dicts = pd.Series([dict([(0, 'foo'), (1, 'spam')]),
+                             dict([(0, 'bar'), (1, 'eggs')])])
+        B = DataFrame([[0, 1], [2, 3]])
+        B_dicts = pd.Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])])
+        fn = lambda x: x.to_dict()
+
+        for df, dicts in [(A, A_dicts), (B, B_dicts)]:
+            reduce_true = df.apply(fn, reduce=True)
+            reduce_false = df.apply(fn, reduce=False)
+            reduce_none = df.apply(fn, reduce=None)
+
+            assert_series_equal(reduce_true, dicts)
+            assert_frame_equal(reduce_false, df)
+            assert_series_equal(reduce_none, dicts)
+
     def test_applymap(self):
         applied = self.frame.applymap(lambda x: x * 2)
         assert_frame_equal(applied, self.frame * 2)