Merge branch 'master' into test_getmembers_series

jreback · web-flow · commit 1e46304723c0 · 2020-12-29T11:50:01.000-05:00
diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -730,7 +730,7 @@ The :meth:`~plt.close` method is used to `close <https://matplotlib.org/3.1.1/ap
    ts = ts.cumsum()
 
    @savefig series_plot_basic.png
-   ts.plot()
+   ts.plot();
 
 On a DataFrame, the :meth:`~DataFrame.plot` method is a convenience to plot all
 of the columns with labels:
@@ -743,10 +743,10 @@ of the columns with labels:
 
    df = df.cumsum()
 
-   plt.figure()
-   df.plot()
+   plt.figure();
+   df.plot();
    @savefig frame_plot_basic.png
-   plt.legend(loc='best')
+   plt.legend(loc='best');
 
 Getting data in/out
 -------------------
diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
@@ -41,7 +41,8 @@ I/O
 
 Other
 ~~~~~
-- - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`).
+- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`).
+- Fixed build failure on MacOS 11 in Python 3.9.1 (:issue:`38766`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -295,7 +295,7 @@ Sparse
 
 ExtensionArray
 ^^^^^^^^^^^^^^
-
+- Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with ExtensionArray dtype (:issue:`38729`)
 -
 -
 
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
@@ -55,39 +55,11 @@ cdef:
 
     float64_t NaN = <float64_t>np.NaN
 
-cdef inline int int_max(int a, int b): return a if a >= b else b
-cdef inline int int_min(int a, int b): return a if a <= b else b
-
 cdef bint is_monotonic_increasing_start_end_bounds(
     ndarray[int64_t, ndim=1] start, ndarray[int64_t, ndim=1] end
 ):
     return is_monotonic(start, False)[0] and is_monotonic(end, False)[0]
 
-# Cython implementations of rolling sum, mean, variance, skewness,
-# other statistical moment functions
-#
-# Misc implementation notes
-# -------------------------
-#
-# - In Cython x * x is faster than x ** 2 for C types, this should be
-#   periodically revisited to see if it's still true.
-#
-
-# original C implementation by N. Devillard.
-# This code in public domain.
-# Function :   kth_smallest()
-# In       :   array of elements, # of elements in the array, rank k
-# Out      :   one element
-# Job      :   find the kth smallest element in the array
-
-#             Reference:
-
-#               Author: Wirth, Niklaus
-#                Title: Algorithms + data structures = programs
-#            Publisher: Englewood Cliffs: Prentice-Hall, 1976
-# Physical description: 366 p.
-#               Series: Prentice-Hall Series in Automatic Computation
-
 # ----------------------------------------------------------------------
 # Rolling sum
 
@@ -774,7 +746,6 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
 
 def roll_median_c(const float64_t[:] values, ndarray[int64_t] start,
                   ndarray[int64_t] end, int64_t minp):
-    # GH 32865. win argument kept for compatibility
     cdef:
         float64_t val, res, prev
         bint err = False
@@ -1167,9 +1138,8 @@ def roll_apply(object obj,
     arr = np.asarray(obj)
 
     # ndarray input
-    if raw:
-        if not arr.flags.c_contiguous:
-            arr = arr.copy('C')
+    if raw and not arr.flags.c_contiguous:
+        arr = arr.copy('C')
 
     counts = roll_sum(np.isfinite(arr).astype(float), start, end, minp)
 
@@ -1195,17 +1165,17 @@ def roll_apply(object obj,
 # Rolling sum and mean for weighted window
 
 
-def roll_weighted_sum(float64_t[:] values, float64_t[:] weights, int minp):
+def roll_weighted_sum(const float64_t[:] values, const float64_t[:] weights, int minp):
     return _roll_weighted_sum_mean(values, weights, minp, avg=0)
 
 
-def roll_weighted_mean(float64_t[:] values, float64_t[:] weights, int minp):
+def roll_weighted_mean(const float64_t[:] values, const float64_t[:] weights, int minp):
     return _roll_weighted_sum_mean(values, weights, minp, avg=1)
 
 
-cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
-                                                float64_t[:] weights,
-                                                int minp, bint avg):
+cdef float64_t[:] _roll_weighted_sum_mean(const float64_t[:] values,
+                                          const float64_t[:] weights,
+                                          int minp, bint avg):
     """
     Assume len(weights) << len(values)
     """
@@ -1270,7 +1240,7 @@ cdef ndarray[float64_t] _roll_weighted_sum_mean(float64_t[:] values,
                 if c < minp:
                     output[in_i] = NaN
 
-    return np.asarray(output)
+    return output
 
 
 # ----------------------------------------------------------------------
@@ -1424,7 +1394,7 @@ cdef inline void remove_weighted_var(float64_t val,
             mean[0] = 0
 
 
-def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
+def roll_weighted_var(const float64_t[:] values, const float64_t[:] weights,
                       int64_t minp, unsigned int ddof):
     """
     Calculates weighted rolling variance using West's online algorithm.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -89,9 +89,10 @@
 import pandas as pd
 from pandas.core import arraylike, indexing, missing, nanops
 import pandas.core.algorithms as algos
+from pandas.core.arrays import ExtensionArray
 from pandas.core.base import PandasObject, SelectionMixin
 import pandas.core.common as com
-from pandas.core.construction import create_series_with_explicit_dtype
+from pandas.core.construction import create_series_with_explicit_dtype, extract_array
 from pandas.core.flags import Flags
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.api import (
@@ -8780,6 +8781,9 @@ def _where(
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
 
+        if axis is not None:
+            axis = self._get_axis_number(axis)
+
         # align the cond to same shape as myself
         cond = com.apply_if_callable(cond, self)
         if isinstance(cond, NDFrame):
@@ -8819,22 +8823,39 @@ def _where(
             if other.ndim <= self.ndim:
 
                 _, other = self.align(
-                    other, join="left", axis=axis, level=level, fill_value=np.nan
+                    other,
+                    join="left",
+                    axis=axis,
+                    level=level,
+                    fill_value=np.nan,
+                    copy=False,
                 )
 
                 # if we are NOT aligned, raise as we cannot where index
-                if axis is None and not all(
-                    other._get_axis(i).equals(ax) for i, ax in enumerate(self.axes)
-                ):
+                if axis is None and not other._indexed_same(self):
                     raise InvalidIndexError
 
+                elif other.ndim < self.ndim:
+                    # TODO(EA2D): avoid object-dtype cast in EA case GH#38729
+                    other = other._values
+                    if axis == 0:
+                        other = np.reshape(other, (-1, 1))
+                    elif axis == 1:
+                        other = np.reshape(other, (1, -1))
+
+                    other = np.broadcast_to(other, self.shape)
+
             # slice me out of the other
             else:
                 raise NotImplementedError(
                     "cannot align with a higher dimensional NDFrame"
                 )
 
-        if isinstance(other, np.ndarray):
+        if not isinstance(other, (MultiIndex, NDFrame)):
+            # mainly just catching Index here
+            other = extract_array(other, extract_numpy=True)
+
+        if isinstance(other, (np.ndarray, ExtensionArray)):
 
             if other.shape != self.shape:
 
@@ -8879,10 +8900,10 @@ def _where(
         else:
             align = self._get_axis_number(axis) == 1
 
-        if align and isinstance(other, NDFrame):
-            other = other.reindex(self._info_axis, axis=self._info_axis_number)
         if isinstance(cond, NDFrame):
-            cond = cond.reindex(self._info_axis, axis=self._info_axis_number)
+            cond = cond.reindex(
+                self._info_axis, axis=self._info_axis_number, copy=False
+            )
 
         block_axis = self._get_block_manager_axis(axis)
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1064,9 +1064,7 @@ def putmask(self, mask, new, axis: int = 0) -> List["Block"]:
             # If the default repeat behavior in np.putmask would go in the
             # wrong direction, then explicitly repeat and reshape new instead
             if getattr(new, "ndim", 0) >= 1:
-                if self.ndim - 1 == new.ndim and axis == 1:
-                    new = np.repeat(new, new_values.shape[-1]).reshape(self.shape)
-                new = new.astype(new_values.dtype)
+                new = new.astype(new_values.dtype, copy=False)
 
             # we require exact matches between the len of the
             # values we are setting (or is compat). np.putmask
@@ -1104,13 +1102,6 @@ def putmask(self, mask, new, axis: int = 0) -> List["Block"]:
                     new = new.T
                 axis = new_values.ndim - axis - 1
 
-            # Pseudo-broadcast
-            if getattr(new, "ndim", 0) >= 1:
-                if self.ndim - 1 == new.ndim:
-                    new_shape = list(new.shape)
-                    new_shape.insert(axis, 1)
-                    new = new.reshape(tuple(new_shape))
-
             # operate column-by-column
             def f(mask, val, idx):
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -174,9 +174,8 @@ def _create_data(self, obj: FrameOrSeries) -> FrameOrSeries:
         Split data into blocks & return conformed data.
         """
         # filter out the on from the object
-        if self.on is not None and not isinstance(self.on, Index):
-            if obj.ndim == 2:
-                obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
+        if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
+            obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
         if self.axis == 1:
             # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything
             # to float to calculate the complete row at once. We exclude all non-numeric
@@ -238,10 +237,6 @@ def _get_cov_corr_window(
         """
         return self.window
 
-    @property
-    def _window_type(self) -> str:
-        return type(self).__name__
-
     def __repr__(self) -> str:
         """
         Provide a nice str repr of our rolling object.
@@ -252,7 +247,7 @@ def __repr__(self) -> str:
             if getattr(self, attr_name, None) is not None
         )
         attrs = ",".join(attrs_list)
-        return f"{self._window_type} [{attrs}]"
+        return f"{type(self).__name__} [{attrs}]"
 
     def __iter__(self):
         obj = self._create_data(self._selected_obj)
@@ -278,7 +273,7 @@ def _prep_values(self, values: Optional[np.ndarray] = None) -> np.ndarray:
 
         if needs_i8_conversion(values.dtype):
             raise NotImplementedError(
-                f"ops for {self._window_type} for this "
+                f"ops for {type(self).__name__} for this "
                 f"dtype {values.dtype} are not implemented"
             )
         else:
@@ -464,7 +459,6 @@ def calc(x):
                     result = np.apply_along_axis(calc, self.axis, values)
                 else:
                     result = calc(values)
-                    result = np.asarray(result)
 
             if numba_cache_key is not None:
                 NUMBA_FUNC_CACHE[numba_cache_key] = func
@@ -1102,8 +1096,8 @@ def calc(x):
                 if values.ndim > 1:
                     result = np.apply_along_axis(calc, self.axis, values)
                 else:
-                    result = calc(values)
-                    result = np.asarray(result)
+                    # Our weighted aggregations return memoryviews
+                    result = np.asarray(calc(values))
 
             if self.center:
                 result = self._center_window(result, offset)
@@ -2158,7 +2152,7 @@ def _validate_monotonic(self):
         """
         Validate that on is monotonic;
         in this case we have to check only for nans, because
-        monotonicy was already validated at a higher level.
+        monotonicity was already validated at a higher level.
         """
         if self._on.hasnans:
             self._raise_monotonic_error()
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
diff --git a/setup.py b/setup.py

Original file line number	Diff line number	Diff line change
`@@ -41,7 +41,8 @@ I/O`
`41`	`41`
`42`	`42`	`Other`
`43`	`43`	`~~~~~`
`44`		-- - ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`).
	`44`	+- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`).
	`45`	+- Fixed build failure on MacOS 11 in Python 3.9.1 (:issue:`38766`)
`45`	`46`	`-`
`46`	`47`
`47`	`48`	`.. ---------------------------------------------------------------------------`
Original file line number	Diff line number	Diff line change
`@@ -295,7 +295,7 @@ Sparse`
`295`	`295`
`296`	`296`	`ExtensionArray`
`297`	`297`	`^^^^^^^^^^^^^^`
`298`		`-`
	`298`	+- Bug in :meth:`DataFrame.where` when ``other`` is a :class:`Series` with ExtensionArray dtype (:issue:`38729`)
`299`	`299`	`-`
`300`	`300`	`-`
`301`	`301`