pandas-dev · evanpw · Apr 4, 2015 · jreback · Apr 9, 2015 · evanpw
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -88,6 +88,7 @@ Bug Fixes
 
 - Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`)
 
+- Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`)
 
 - Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`)
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -3634,19 +3634,31 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
             else:
                 other = self._constructor(other, **self._construct_axes_dict())
 
+        if axis is None:
+            axis = 0
+
+        if self.ndim == getattr(other, 'ndim', 0):
+            align = True
+        else:
+            align = (self._get_axis_number(axis) == 1)
+
+        block_axis = self._get_block_manager_axis(axis)
+
         if inplace:
             # we may have different type blocks come out of putmask, so
             # reconstruct the block manager
 
             self._check_inplace_setting(other)
-            new_data = self._data.putmask(mask=cond, new=other, align=axis is None,
-                                          inplace=True)
+            new_data = self._data.putmask(mask=cond, new=other, align=align,
+                                          inplace=True, axis=block_axis,
+                                          transpose=self._AXIS_REVERSED)
             self._update_inplace(new_data)
 
         else:
-            new_data = self._data.where(other=other, cond=cond, align=axis is None,
+            new_data = self._data.where(other=other, cond=cond, align=align,
                                         raise_on_error=raise_on_error,
-                                        try_cast=try_cast)
+                                        try_cast=try_cast, axis=block_axis,
+                                        transpose=self._AXIS_REVERSED)
 
             return self._constructor(new_data).__finalize__(self)
 

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -632,7 +632,8 @@ def _is_empty_indexer(indexer):
 
         return [self]
 
-    def putmask(self, mask, new, align=True, inplace=False):
+    def putmask(self, mask, new, align=True, inplace=False,
+                axis=0, transpose=False):
         """ putmask the data to the block; it is possible that we may create a
         new dtype of block
 
@@ -644,37 +645,55 @@ def putmask(self, mask, new, align=True, inplace=False):
         new : a ndarray/object
         align : boolean, perform alignment on other/cond, default is True
         inplace : perform inplace modification, default is False
+        axis : int
+        transpose : boolean
+            Set to True if self is stored with axes reversed
 
         Returns
         -------
-        a new block(s), the result of the putmask
+        a list of new blocks, the result of the putmask
         """
 
         new_values = self.values if inplace else self.values.copy()
 
-        # may need to align the new
         if hasattr(new, 'reindex_axis'):
-            new = new.values.T
+            new = new.values
 
-        # may need to align the mask
         if hasattr(mask, 'reindex_axis'):
-            mask = mask.values.T
+            mask = mask.values
 
         # if we are passed a scalar None, convert it here
         if not is_list_like(new) and isnull(new) and not self.is_object:
             new = self.fill_value
 
         if self._can_hold_element(new):
+            if transpose:
+                new_values = new_values.T
+
             new = self._try_cast(new)
 
-            # pseudo-broadcast
-            if isinstance(new, np.ndarray) and new.ndim == self.ndim - 1:
-                new = np.repeat(new, self.shape[-1]).reshape(self.shape)
+            # If the default repeat behavior in np.putmask would go in the wrong
+            # direction, then explictly repeat and reshape new instead
+            if getattr(new, 'ndim', 0) >= 1:
+                if self.ndim - 1 == new.ndim and axis == 1:
+                    new = np.repeat(new, new_values.shape[-1]).reshape(self.shape)
 
             np.putmask(new_values, mask, new)
 
         # maybe upcast me
         elif mask.any():
+            if transpose:
+                mask = mask.T
+                if isinstance(new, np.ndarray):
+                    new = new.T
+                axis = new_values.ndim - axis - 1
+
+            # Pseudo-broadcast
+            if getattr(new, 'ndim', 0) >= 1:
+                if self.ndim - 1 == new.ndim:
+                    new_shape = list(new.shape)
+                    new_shape.insert(axis, 1)
+                    new = new.reshape(tuple(new_shape))
 
             # need to go column by column
             new_blocks = []
@@ -685,14 +704,15 @@ def putmask(self, mask, new, align=True, inplace=False):
 
                     # need a new block
                     if m.any():
-
-                        n = new[i] if isinstance(
-                            new, np.ndarray) else np.array(new)
+                        if isinstance(new, np.ndarray):
+                            n = np.squeeze(new[i % new.shape[0]])
+                        else:
+                            n = np.array(new)
 
                         # type of the new block
                         dtype, _ = com._maybe_promote(n.dtype)
 
-                        # we need to exiplicty astype here to make a copy
+                        # we need to explicitly astype here to make a copy
                         n = n.astype(dtype)
 
                         nv = _putmask_smart(v, m, n)
@@ -718,8 +738,10 @@ def putmask(self, mask, new, align=True, inplace=False):
         if inplace:
             return [self]
 
-        return [make_block(new_values,
-                           placement=self.mgr_locs, fastpath=True)]
+        if transpose:
+            new_values = new_values.T
+
+        return [make_block(new_values, placement=self.mgr_locs, fastpath=True)]
 
     def interpolate(self, method='pad', axis=0, index=None,
                     values=None, inplace=False, limit=None,
@@ -1003,7 +1025,7 @@ def handle_error():
                            fastpath=True, placement=self.mgr_locs)]
 
     def where(self, other, cond, align=True, raise_on_error=True,
-              try_cast=False):
+              try_cast=False, axis=0, transpose=False):
         """
         evaluate the block; return result block(s) from the result
 
@@ -1014,50 +1036,33 @@ def where(self, other, cond, align=True, raise_on_error=True,
         align : boolean, perform alignment on other/cond
         raise_on_error : if True, raise when I can't perform the function,
             False by default (and just return the data that we had coming in)
+        axis : int
+        transpose : boolean
+            Set to True if self is stored with axes reversed
 
         Returns
         -------
         a new block(s), the result of the func
         """
 
         values = self.values
+        if transpose:
+            values = values.T
 
-        # see if we can align other
         if hasattr(other, 'reindex_axis'):
             other = other.values
 
-        # make sure that we can broadcast
-        is_transposed = False
-        if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
-            if values.ndim != other.ndim or values.shape == other.shape[::-1]:
-
-                # if its symmetric are ok, no reshaping needed (GH 7506)
-                if (values.shape[0] == np.array(values.shape)).all():
-                    pass
-
-                # pseodo broadcast (its a 2d vs 1d say and where needs it in a
-                # specific direction)
-                elif (other.ndim >= 1 and values.ndim - 1 == other.ndim and
-                        values.shape[0] != other.shape[0]):
-                    other = _block_shape(other).T
-                else:
-                    values = values.T
-                    is_transposed = True
-
-        # see if we can align cond
-        if not hasattr(cond, 'shape'):
-            raise ValueError(
-                "where must have a condition that is ndarray like")
-
         if hasattr(cond, 'reindex_axis'):
             cond = cond.values
 
-        # may need to undo transpose of values
-        if hasattr(values, 'ndim'):
-            if values.ndim != cond.ndim or values.shape == cond.shape[::-1]:
+        # If the default broadcasting would go in the wrong direction, then
+        # explictly reshape other instead
+        if getattr(other, 'ndim', 0) >= 1:
+            if values.ndim - 1 == other.ndim and axis == 1:
+                other = other.reshape(tuple(other.shape + (1,)))
 
-                values = values.T
-                is_transposed = not is_transposed
+        if not hasattr(cond, 'shape'):
+            raise ValueError("where must have a condition that is ndarray like")
 
         other = _maybe_convert_string_to_object(other)
 
@@ -1090,15 +1095,14 @@ def func(c, v, o):
                 raise TypeError('Could not compare [%s] with block values'
                                 % repr(other))
 
-            if is_transposed:
+            if transpose:
                 result = result.T
 
             # try to cast if requested
             if try_cast:
                 result = self._try_cast_result(result)
 
-            return make_block(result,
-                              ndim=self.ndim, placement=self.mgr_locs)
+            return make_block(result, ndim=self.ndim, placement=self.mgr_locs)
 
         # might need to separate out blocks
         axis = cond.ndim - 1
@@ -1733,7 +1737,8 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
 
         return self.make_block_same_class(new_values, new_mgr_locs)
 
-    def putmask(self, mask, new, align=True, inplace=False):
+    def putmask(self, mask, new, align=True, inplace=False,
+                axis=0, transpose=False):
         """ putmask the data to the block; it is possible that we may create a
         new dtype of block
 
@@ -2425,12 +2430,18 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, **kwargs):
             else:
                 kwargs['filter'] = filter_locs
 
-        if f == 'where' and kwargs.get('align', True):
+        if f == 'where':
             align_copy = True
-            align_keys = ['other', 'cond']
-        elif f == 'putmask' and kwargs.get('align', True):
+            if kwargs.get('align', True):
+                align_keys = ['other', 'cond']
+            else:
+                align_keys = ['cond']
+        elif f == 'putmask':
             align_copy = False
-            align_keys = ['new', 'mask']
+            if kwargs.get('align', True):
+                align_keys = ['new', 'mask']
+            else:
+                align_keys = ['mask']
         elif f == 'eval':
             align_copy = False
             align_keys = ['other']

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -10046,6 +10046,110 @@ def test_where_complex(self):
         df[df.abs() >= 5] = np.nan
         assert_frame_equal(df,expected)
 
+    def test_where_axis(self):
+        # GH 9736
+        df = DataFrame(np.random.randn(2, 2))
+        mask = DataFrame([[False, False], [False, False]])
+        s = Series([0, 1])
+
+        expected = DataFrame([[0, 0], [1, 1]], dtype='float64')
+        result = df.where(mask, s, axis='index')
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.where(mask, s, axis='index', inplace=True)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame([[0, 1], [0, 1]], dtype='float64')
+        result = df.where(mask, s, axis='columns')
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.where(mask, s, axis='columns', inplace=True)
+        assert_frame_equal(result, expected)
+
+        # Upcast needed
+        df = DataFrame([[1, 2], [3, 4]], dtype='int64')
+        mask = DataFrame([[False, False], [False, False]])
+        s = Series([0, np.nan])
+
+        expected = DataFrame([[0, 0], [np.nan, np.nan]], dtype='float64')
+        result = df.where(mask, s, axis='index')
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.where(mask, s, axis='index', inplace=True)
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame([[0, np.nan], [0, np.nan]], dtype='float64')
+        result = df.where(mask, s, axis='columns')
+        assert_frame_equal(result, expected)
+
+        expected = DataFrame({0 : np.array([0, 0], dtype='int64'),
+                              1 : np.array([np.nan, np.nan], dtype='float64')})
+        result = df.copy()
+        result.where(mask, s, axis='columns', inplace=True)
+        assert_frame_equal(result, expected)
+
+        # Multiple dtypes (=> multiple Blocks)
+        df = pd.concat([DataFrame(np.random.randn(10, 2)),
+                     DataFrame(np.random.randint(0, 10, size=(10, 2)))],
+                     ignore_index=True, axis=1)
+        mask = DataFrame(False, columns=df.columns, index=df.index)
+        s1 = Series(1, index=df.columns)
+        s2 = Series(2, index=df.index)
+
+        result = df.where(mask, s1, axis='columns')
+        expected = DataFrame(1.0, columns=df.columns, index=df.index)
+        expected[2] = expected[2].astype(int)
+        expected[3] = expected[3].astype(int)
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.where(mask, s1, axis='columns', inplace=True)
+        assert_frame_equal(result, expected)
+
+        result = df.where(mask, s2, axis='index')
+        expected = DataFrame(2.0, columns=df.columns, index=df.index)
+        expected[2] = expected[2].astype(int)
+        expected[3] = expected[3].astype(int)
+        assert_frame_equal(result, expected)
+
+        result = df.copy()
+        result.where(mask, s2, axis='index', inplace=True)
+        assert_frame_equal(result, expected)
+
+        # DataFrame vs DataFrame
+        d1 = df.copy().drop(1, axis=0)
+        expected = df.copy()
+        expected.loc[1, :] = np.nan
+
+        result = df.where(mask, d1)
+        assert_frame_equal(result, expected)
+        result = df.where(mask, d1, axis='index')
+        assert_frame_equal(result, expected)
+        result = df.copy()
+        result.where(mask, d1, inplace=True)
+        assert_frame_equal(result, expected)
+        result = df.copy()
+        result.where(mask, d1, inplace=True, axis='index')
+        assert_frame_equal(result, expected)
+
+        d2 = df.copy().drop(1, axis=1)
+        expected = df.copy()
+        expected.loc[:, 1] = np.nan
+
+        result = df.where(mask, d2)
+        assert_frame_equal(result, expected)
+        result = df.where(mask, d2, axis='columns')
+        assert_frame_equal(result, expected)
+        result = df.copy()
+        result.where(mask, d2, inplace=True)
+        assert_frame_equal(result, expected)
+        result = df.copy()
+        result.where(mask, d2, inplace=True, axis='columns')
+        assert_frame_equal(result, expected)
+
     def test_mask(self):
         df = DataFrame(np.random.randn(5, 3))
         cond = df > 0
Original file line number	Diff line number	Diff line change
Expand Up		@@ -88,6 +88,7 @@ Bug Fixes

		- Bug in ``ExcelReader`` when worksheet is empty (:issue:`6403`)

		- Bug causing ``DataFrame.where`` to not respect the ``axis`` parameter when the frame has a symmetric shape. (:issue:`9736`)

		- Bug in ``Table.select_column`` where name is not preserved (:issue:`10392`)

Expand Down