From 47c0695ba3025d1538f964ba6e85868560964aa7 Mon Sep 17 00:00:00 2001
From: behzad nouri <behzadnouri@gmail.com>
Date: Sun, 14 Jun 2015 18:43:23 -0400
Subject: [PATCH] closes bug in apply when function returns categorical

---
 doc/source/whatsnew/v0.17.0.txt |  1 +
 pandas/core/internals.py        |  3 ++
 pandas/src/reduce.pyx           | 54 +++++++++++++--------------------
 pandas/tests/test_frame.py      |  7 +++++
 4 files changed, 32 insertions(+), 33 deletions(-)
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
index 164ab73def894..4a513f3122390 100644
--- a/doc/source/whatsnew/v0.17.0.txt
+++ b/doc/source/whatsnew/v0.17.0.txt
@@ -58,3 +58,4 @@ Performance Improvements
 
 Bug Fixes
 ~~~~~~~~~
+- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 7d83e45098ae1..4c4d940f8077c 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1670,6 +1670,9 @@ def is_view(self):
     def to_dense(self):
         return self.values.to_dense().view()
 
+    def convert(self, copy=True, **kwargs):
+        return [self.copy() if copy else self]
+
     @property
     def shape(self):
         return (len(self.mgr_locs), len(self.values))
diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
index add9a03642bed..09f8e0ab42924 100644
--- a/pandas/src/reduce.pyx
+++ b/pandas/src/reduce.pyx
@@ -6,6 +6,18 @@ from distutils.version import LooseVersion
 
 is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'
 
+cdef _get_result_array(object obj,
+                       Py_ssize_t size,
+                       Py_ssize_t cnt):
+
+    if isinstance(obj, np.ndarray) \
+            or isinstance(obj, list) and len(obj) == cnt \
+            or getattr(obj, 'shape', None) == (cnt,):
+        raise ValueError('function does not reduce')
+
+    return np.empty(size, dtype='O')
+
+
 cdef class Reducer:
     '''
     Performs generic reduction operation on a C or Fortran-contiguous ndarray
@@ -124,7 +136,9 @@ cdef class Reducer:
                 if hasattr(res,'values'):
                     res = res.values
                 if i == 0:
-                    result = self._get_result_array(res)
+                    result = _get_result_array(res,
+                                               self.nresults,
+                                               len(self.dummy))
                     it = <flatiter> PyArray_IterNew(result)
 
                 PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
@@ -143,17 +157,6 @@ cdef class Reducer:
 
         return result
 
-    def _get_result_array(self, object res):
-        try:
-            assert(not isinstance(res, np.ndarray))
-            assert(not (isinstance(res, list) and len(res) == len(self.dummy)))
-
-            result = np.empty(self.nresults, dtype='O')
-            result[0] = res
-        except Exception:
-            raise ValueError('function does not reduce')
-        return result
-
 
 cdef class SeriesBinGrouper:
     '''
@@ -257,8 +260,10 @@ cdef class SeriesBinGrouper:
                 res = self.f(cached_typ)
                 res = _extract_result(res)
                 if not initialized:
-                    result = self._get_result_array(res)
                     initialized = 1
+                    result = _get_result_array(res,
+                                               self.ngroups,
+                                               len(self.dummy_arr))
 
                 util.assign_value_1d(result, i, res)
 
@@ -277,16 +282,6 @@ cdef class SeriesBinGrouper:
 
         return result, counts
 
-    def _get_result_array(self, object res):
-        try:
-            assert(not isinstance(res, np.ndarray))
-            assert(not (isinstance(res, list) and len(res) == len(self.dummy_arr)))
-
-            result = np.empty(self.ngroups, dtype='O')
-        except Exception:
-            raise ValueError('function does not reduce')
-        return result
-
 
 cdef class SeriesGrouper:
     '''
@@ -388,8 +383,10 @@ cdef class SeriesGrouper:
                     res = self.f(cached_typ)
                     res = _extract_result(res)
                     if not initialized:
-                        result = self._get_result_array(res)
                         initialized = 1
+                        result = _get_result_array(res,
+                                                   self.ngroups,
+                                                   len(self.dummy_arr))
 
                     util.assign_value_1d(result, lab, res)
                     counts[lab] = group_size
@@ -410,15 +407,6 @@ cdef class SeriesGrouper:
 
         return result, counts
 
-    def _get_result_array(self, object res):
-        try:
-            assert(not isinstance(res, np.ndarray))
-            assert(not (isinstance(res, list) and len(res) == len(self.dummy_arr)))
-
-            result = np.empty(self.ngroups, dtype='O')
-        except Exception:
-            raise ValueError('function does not reduce')
-        return result
 
 cdef inline _extract_result(object res):
     ''' extract the result object, it might be a 0-dim ndarray
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 4b1954a3be64e..a4abe481cfe81 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -10382,6 +10382,13 @@ def test_apply(self):
             [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
         self.assertRaises(ValueError, df.apply, lambda x: x, 2)
 
+        # GH9573
+        df = DataFrame({'c0':['A','A','B','B'], 'c1':['C','C','D','D']})
+        df = df.apply(lambda ts: ts.astype('category'))
+        self.assertEqual(df.shape, (4, 2))
+        self.assertTrue(isinstance(df['c0'].dtype, com.CategoricalDtype))
+        self.assertTrue(isinstance(df['c1'].dtype, com.CategoricalDtype))
+
     def test_apply_mixed_datetimelike(self):
         # mixed datetimelike
         # GH 7778