Skip to content

DataFrame.where method results in ValueError excepted when at least one of the columns is a categorical #22609

Closed
@DmitriyLeybel

Description

@DmitriyLeybel

pandas: 0.23.4

I experienced the error in a different workflow but was able to narrow it down to a simple scenario.

This works as expected:

Working example

import pandas as pd

iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')

# iris.loc[:,'species'] = iris.species.astype('category')

iris.where(iris.sepal_width > 2)

image

However, once we transform one of the columns to a category, it breaks.

Broken example

import pandas as pd

iris = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')

iris.loc[:,'species'] = iris.species.astype('category')

iris.where(iris.sepal_width > 2)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-13-aa56ba330a63> in <module>()
      5 iris.loc[:,'species'] = iris.species.astype('category')
      6 
----> 7 iris.where(iris.sepal_width > 2)

C:\Anaconda3\lib\site-packages\pandas\core\generic.py in where(self, cond, other, inplace, axis, level, errors, try_cast, raise_on_error)
   7770         other = com._apply_if_callable(other, self)
   7771         return self._where(cond, other, inplace, axis, level,
-> 7772                            errors=errors, try_cast=try_cast)
   7773 
   7774     @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False",

C:\Anaconda3\lib\site-packages\pandas\core\generic.py in _where(self, cond, other, inplace, axis, level, errors, try_cast)
   7630                                         errors=errors,
   7631                                         try_cast=try_cast, axis=block_axis,
-> 7632                                         transpose=self._AXIS_REVERSED)
   7633 
   7634             return self._constructor(new_data).__finalize__(self)

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in where(self, **kwargs)
   3682 
   3683     def where(self, **kwargs):
-> 3684         return self.apply('where', **kwargs)
   3685 
   3686     def eval(self, **kwargs):

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in apply(self, f, axes, filter, do_integrity_check, consolidate, **kwargs)
   3579 
   3580             kwargs['mgr'] = self
-> 3581             applied = getattr(b, f)(**kwargs)
   3582             result_blocks = _extend_blocks(applied, result_blocks)
   3583 

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in where(self, other, cond, align, errors, try_cast, axis, transpose, mgr)
   1536                 result = self._try_cast_result(result)
   1537 
-> 1538             return self.make_block(result)
   1539 
   1540         # might need to separate out blocks

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in make_block(self, values, placement, ndim)
    259             ndim = self.ndim
    260 
--> 261         return make_block(values, placement=placement, ndim=ndim)
    262 
    263     def make_block_scalar(self, values):

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in make_block(values, placement, klass, ndim, dtype, fastpath)
   3203                      placement=placement, dtype=dtype)
   3204 
-> 3205     return klass(values, ndim=ndim, placement=placement)
   3206 
   3207 # TODO: flexible with index=None and/or items=None

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim)
   2301 
   2302         super(ObjectBlock, self).__init__(values, ndim=ndim,
-> 2303                                           placement=placement)
   2304 
   2305     @property

C:\Anaconda3\lib\site-packages\pandas\core\internals.py in __init__(self, values, placement, ndim)
    123             raise ValueError(
    124                 'Wrong number of items passed {val}, placement implies '
--> 125                 '{mgr}'.format(val=len(self.values), mgr=len(self.mgr_locs)))
    126 
    127     def _check_ndim(self, values, ndim):

ValueError: Wrong number of items passed 150, placement implies 1

Same error when a function is passed instead of a boolean Series.

Metadata

Metadata

Labels

CategoricalCategorical Data TypeIndexingRelated to indexing on series/frames, not to indexes themselvesNeeds TestsUnit test(s) needed to prevent regressionsgood first issue

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions