Skip to content

BUG: closes bug in apply when function returns categorical #10354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 15, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ Performance Improvements

Bug Fixes
~~~~~~~~~
- Bug in ``DataFrame.apply`` when function returns categorical series. (:issue:`9573`)
3 changes: 3 additions & 0 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1670,6 +1670,9 @@ def is_view(self):
def to_dense(self):
return self.values.to_dense().view()

def convert(self, copy=True, **kwargs):
return [self.copy() if copy else self]

@property
def shape(self):
return (len(self.mgr_locs), len(self.values))
Expand Down
54 changes: 21 additions & 33 deletions pandas/src/reduce.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,18 @@ from distutils.version import LooseVersion

is_numpy_prior_1_6_2 = LooseVersion(np.__version__) < '1.6.2'

cdef _get_result_array(object obj,
Py_ssize_t size,
Py_ssize_t cnt):

if isinstance(obj, np.ndarray) \
or isinstance(obj, list) and len(obj) == cnt \
or getattr(obj, 'shape', None) == (cnt,):
raise ValueError('function does not reduce')

return np.empty(size, dtype='O')


cdef class Reducer:
'''
Performs generic reduction operation on a C or Fortran-contiguous ndarray
Expand Down Expand Up @@ -124,7 +136,9 @@ cdef class Reducer:
if hasattr(res,'values'):
res = res.values
if i == 0:
result = self._get_result_array(res)
result = _get_result_array(res,
self.nresults,
len(self.dummy))
it = <flatiter> PyArray_IterNew(result)

PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
Expand All @@ -143,17 +157,6 @@ cdef class Reducer:

return result

def _get_result_array(self, object res):
try:
assert(not isinstance(res, np.ndarray))
assert(not (isinstance(res, list) and len(res) == len(self.dummy)))

result = np.empty(self.nresults, dtype='O')
result[0] = res
except Exception:
raise ValueError('function does not reduce')
return result


cdef class SeriesBinGrouper:
'''
Expand Down Expand Up @@ -257,8 +260,10 @@ cdef class SeriesBinGrouper:
res = self.f(cached_typ)
res = _extract_result(res)
if not initialized:
result = self._get_result_array(res)
initialized = 1
result = _get_result_array(res,
self.ngroups,
len(self.dummy_arr))

util.assign_value_1d(result, i, res)

Expand All @@ -277,16 +282,6 @@ cdef class SeriesBinGrouper:

return result, counts

def _get_result_array(self, object res):
try:
assert(not isinstance(res, np.ndarray))
assert(not (isinstance(res, list) and len(res) == len(self.dummy_arr)))

result = np.empty(self.ngroups, dtype='O')
except Exception:
raise ValueError('function does not reduce')
return result


cdef class SeriesGrouper:
'''
Expand Down Expand Up @@ -388,8 +383,10 @@ cdef class SeriesGrouper:
res = self.f(cached_typ)
res = _extract_result(res)
if not initialized:
result = self._get_result_array(res)
initialized = 1
result = _get_result_array(res,
self.ngroups,
len(self.dummy_arr))

util.assign_value_1d(result, lab, res)
counts[lab] = group_size
Expand All @@ -410,15 +407,6 @@ cdef class SeriesGrouper:

return result, counts

def _get_result_array(self, object res):
try:
assert(not isinstance(res, np.ndarray))
assert(not (isinstance(res, list) and len(res) == len(self.dummy_arr)))

result = np.empty(self.ngroups, dtype='O')
except Exception:
raise ValueError('function does not reduce')
return result

cdef inline _extract_result(object res):
''' extract the result object, it might be a 0-dim ndarray
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10382,6 +10382,13 @@ def test_apply(self):
[[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=['a', 'a', 'c'])
self.assertRaises(ValueError, df.apply, lambda x: x, 2)

# GH9573
df = DataFrame({'c0':['A','A','B','B'], 'c1':['C','C','D','D']})
df = df.apply(lambda ts: ts.astype('category'))
self.assertEqual(df.shape, (4, 2))
self.assertTrue(isinstance(df['c0'].dtype, com.CategoricalDtype))
self.assertTrue(isinstance(df['c1'].dtype, com.CategoricalDtype))

def test_apply_mixed_datetimelike(self):
# mixed datetimelike
# GH 7778
Expand Down