sthagen · sthagen · Mar 14, 2020 · Mar 13, 2020 · Mar 13, 2020 · Mar 14, 2020
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -14,7 +14,7 @@ PyDateTime_IMPORT
 
 
 cimport numpy as cnp
-from numpy cimport float64_t, int64_t, ndarray
+from numpy cimport float64_t, int64_t, ndarray, uint8_t
 import numpy as np
 cnp.import_array()
 
@@ -351,7 +351,6 @@ def format_array_from_datetime(
 
 def array_with_unit_to_datetime(
     ndarray values,
-    ndarray mask,
     object unit,
     str errors='coerce'
 ):
@@ -373,8 +372,6 @@ def array_with_unit_to_datetime(
     ----------
     values : ndarray of object
          Date-like objects to convert.
-    mask : boolean ndarray
-         Not-a-time mask for non-nullable integer types conversion, can be None.
     unit : object
          Time unit to use during conversion.
     errors : str, default 'raise'
@@ -395,6 +392,7 @@ def array_with_unit_to_datetime(
         bint need_to_iterate = True
         ndarray[int64_t] iresult
         ndarray[object] oresult
+        ndarray mask
         object tz = None
 
     assert is_ignore or is_coerce or is_raise
@@ -404,9 +402,6 @@ def array_with_unit_to_datetime(
             result = values.astype('M8[ns]')
         else:
             result, tz = array_to_datetime(values.astype(object), errors=errors)
-        if mask is not None:
-            iresult = result.view('i8')
-            iresult[mask] = NPY_NAT
         return result, tz
 
     m = cast_from_unit(None, unit)
@@ -419,9 +414,8 @@ def array_with_unit_to_datetime(
         if values.dtype.kind == "i":
             # Note: this condition makes the casting="same_kind" redundant
             iresult = values.astype('i8', casting='same_kind', copy=False)
-            # If no mask, fill mask by comparing to NPY_NAT constant
-            if mask is None:
-                mask = iresult == NPY_NAT
+            # fill by comparing to NPY_NAT constant
+            mask = iresult == NPY_NAT
             iresult[mask] = 0
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -114,7 +114,18 @@ def apply_index_wraps(func):
     # Note: normally we would use `@functools.wraps(func)`, but this does
     # not play nicely with cython class methods
     def wrapper(self, other):
-        result = func(self, other)
+
+        is_index = getattr(other, "_typ", "") == "datetimeindex"
+
+        # operate on DatetimeArray
+        arr = other._data if is_index else other
+
+        result = func(self, arr)
+
+        if is_index:
+            # Wrap DatetimeArray result back to DatetimeIndex
+            result = type(other)._simple_new(result, name=other.name)
+
         if self.normalize:
             result = result.to_period('D').to_timestamp()
         return result

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -407,6 +407,13 @@ def shape(self) -> Tuple[int, ...]:
         """
         return (len(self),)
 
+    @property
+    def size(self) -> int:
+        """
+        The number of elements in the array.
+        """
+        return np.prod(self.shape)
+
     @property
     def ndim(self) -> int:
         """

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -13,6 +13,7 @@
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
     is_bool_dtype,
+    is_datetime64_dtype,
     is_float,
     is_float_dtype,
     is_integer,
@@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
         if is_float_dtype(dtype):
             # In astype, we consider dtype=float to also mean na_value=np.nan
             kwargs = dict(na_value=np.nan)
+        elif is_datetime64_dtype(dtype):
+            kwargs = dict(na_value=np.datetime64("NaT"))
         else:
             kwargs = {}
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7808,6 +7808,8 @@ def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
     ):
 
+        assert filter_type is None or filter_type == "bool", filter_type
+
         dtype_is_dt = self.dtypes.apply(
             lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
         )
@@ -7835,7 +7837,7 @@ def f(x):
             return op(x, axis=axis, skipna=skipna, **kwds)
 
         def _get_data(axis_matters):
-            if filter_type is None or filter_type == "numeric":
+            if filter_type is None:
                 data = self._get_numeric_data()
             elif filter_type == "bool":
                 if axis_matters:
@@ -7882,15 +7884,11 @@ def blk_func(values):
             return out
 
         if numeric_only is None:
-            values = self.values
+            data = self
+            values = data.values
             try:
                 result = f(values)
 
-                if filter_type == "bool" and is_object_dtype(values) and axis is None:
-                    # work around https://github.com/numpy/numpy/issues/10489
-                    # TODO: combine with hasattr(result, 'dtype') further down
-                    # hard since we don't have `values` down there.
-                    result = np.bool_(result)
             except TypeError:
                 # e.g. in nanops trying to convert strs to float
 
@@ -7916,30 +7914,36 @@ def blk_func(values):
 
                 # TODO: why doesnt axis matter here?
                 data = _get_data(axis_matters=False)
-                with np.errstate(all="ignore"):
-                    result = f(data.values)
                 labels = data._get_agg_axis(axis)
+
+                values = data.values
+                with np.errstate(all="ignore"):
+                    result = f(values)
         else:
             if numeric_only:
                 data = _get_data(axis_matters=True)
+                labels = data._get_agg_axis(axis)
 
                 values = data.values
-                labels = data._get_agg_axis(axis)
             else:
-                values = self.values
+                data = self
+                values = data.values
             result = f(values)
 
-        if hasattr(result, "dtype") and is_object_dtype(result.dtype):
+        if filter_type == "bool" and is_object_dtype(values) and axis is None:
+            # work around https://github.com/numpy/numpy/issues/10489
+            # TODO: can we de-duplicate parts of this with the next blocK?
+            result = np.bool_(result)
+        elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
             try:
-                if filter_type is None or filter_type == "numeric":
+                if filter_type is None:
                     result = result.astype(np.float64)
                 elif filter_type == "bool" and notna(result).all():
                     result = result.astype(np.bool_)
             except (ValueError, TypeError):
-
                 # try to coerce to the original dtypes item by item if we can
                 if axis == 0:
-                    result = coerce_to_dtypes(result, self.dtypes)
+                    result = coerce_to_dtypes(result, data.dtypes)
 
         if constructor is not None:
             result = self._constructor_sliced(result, index=labels)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -30,7 +30,7 @@
 
 from pandas._config import config
 
-from pandas._libs import Timestamp, iNaT, lib
+from pandas._libs import Timestamp, lib
 from pandas._typing import (
     Axis,
     FilePathOrBuffer,
@@ -4558,6 +4558,10 @@ def filter(
         >>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
         ...                   index=['mouse', 'rabbit'],
         ...                   columns=['one', 'two', 'three'])
+        >>> df
+                one  two  three
+        mouse     1    2      3
+        rabbit    4    5      6
 
         >>> # select columns by name
         >>> df.filter(items=['one', 'three'])
@@ -10102,8 +10106,6 @@ def mad(self, axis=None, skipna=None, level=None):
             desc="minimum",
             accum_func=np.minimum.accumulate,
             accum_func_name="min",
-            mask_a=np.inf,
-            mask_b=np.nan,
             examples=_cummin_examples,
         )
         cls.cumsum = _make_cum_function(
@@ -10115,8 +10117,6 @@ def mad(self, axis=None, skipna=None, level=None):
             desc="sum",
             accum_func=np.cumsum,
             accum_func_name="sum",
-            mask_a=0.0,
-            mask_b=np.nan,
             examples=_cumsum_examples,
         )
         cls.cumprod = _make_cum_function(
@@ -10128,8 +10128,6 @@ def mad(self, axis=None, skipna=None, level=None):
             desc="product",
             accum_func=np.cumprod,
             accum_func_name="prod",
-            mask_a=1.0,
-            mask_b=np.nan,
             examples=_cumprod_examples,
         )
         cls.cummax = _make_cum_function(
@@ -10141,8 +10139,6 @@ def mad(self, axis=None, skipna=None, level=None):
             desc="maximum",
             accum_func=np.maximum.accumulate,
             accum_func_name="max",
-            mask_a=-np.inf,
-            mask_b=np.nan,
             examples=_cummax_examples,
         )
 
@@ -11182,8 +11178,6 @@ def _make_cum_function(
     desc: str,
     accum_func: Callable,
     accum_func_name: str,
-    mask_a: float,
-    mask_b: float,
     examples: str,
 ) -> Callable:
     @Substitution(
@@ -11205,61 +11199,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
         if axis == 1:
             return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
 
-        def na_accum_func(blk_values):
-            # We will be applying this function to block values
-            if blk_values.dtype.kind in ["m", "M"]:
-                # GH#30460, GH#29058
-                # numpy 1.18 started sorting NaTs at the end instead of beginning,
-                #  so we need to work around to maintain backwards-consistency.
-                orig_dtype = blk_values.dtype
-
-                # We need to define mask before masking NaTs
-                mask = isna(blk_values)
-
-                if accum_func == np.minimum.accumulate:
-                    # Note: the accum_func comparison fails as an "is" comparison
-                    y = blk_values.view("i8")
-                    y[mask] = np.iinfo(np.int64).max
-                    changed = True
-                else:
-                    y = blk_values
-                    changed = False
-
-                result = accum_func(y.view("i8"), axis)
-                if skipna:
-                    np.putmask(result, mask, iNaT)
-                elif accum_func == np.minimum.accumulate:
-                    # Restore NaTs that we masked previously
-                    nz = (~np.asarray(mask)).nonzero()[0]
-                    if len(nz):
-                        # everything up to the first non-na entry stays NaT
-                        result[: nz[0]] = iNaT
-
-                if changed:
-                    # restore NaT elements
-                    y[mask] = iNaT  # TODO: could try/finally for this?
-
-                if isinstance(blk_values, np.ndarray):
-                    result = result.view(orig_dtype)
-                else:
-                    # DatetimeArray
-                    result = type(blk_values)._from_sequence(result, dtype=orig_dtype)
-
-            elif skipna and not issubclass(
-                blk_values.dtype.type, (np.integer, np.bool_)
-            ):
-                vals = blk_values.copy().T
-                mask = isna(vals)
-                np.putmask(vals, mask, mask_a)
-                result = accum_func(vals, axis)
-                np.putmask(result, mask, mask_b)
-            else:
-                result = accum_func(blk_values.T, axis)
+        def block_accum_func(blk_values):
+            values = blk_values.T if hasattr(blk_values, "T") else blk_values
+
+            result = nanops.na_accum_func(values, accum_func, skipna=skipna)
 
-            # transpose back for ndarray, not for EA
-            return result.T if hasattr(result, "T") else result
+            result = result.T if hasattr(result, "T") else result
+            return result
 
-        result = self._data.apply(na_accum_func)
+        result = self._data.apply(block_accum_func)
 
         d = self._construct_axes_dict()
         d["copy"] = False

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3281,13 +3281,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
         target = _ensure_has_len(target)  # target may be an iterator
 
         if not isinstance(target, Index) and len(target) == 0:
-            attrs = self._get_attributes_dict()
-            attrs.pop("freq", None)  # don't preserve freq
             if isinstance(self, ABCRangeIndex):
                 values = range(0)
             else:
                 values = self._data[:0]  # appropriately-dtyped empty array
-            target = self._simple_new(values, **attrs)
+            target = self._simple_new(values, name=self.name)
         else:
             target = ensure_index(target)
 

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -396,7 +396,7 @@ def _engine(self):
     def unique(self, level=None):
         if level is not None:
             self._validate_index_level(level)
-        result = self.values.unique()
+        result = self._values.unique()
         # Use _simple_new instead of _shallow_copy to ensure we keep dtype
         #  of result, not self.
         return type(self)._simple_new(result, name=self.name)
@@ -423,7 +423,7 @@ def where(self, cond, other=None):
         # 3. Rebuild CategoricalIndex.
         if other is None:
             other = self._na_value
-        values = np.where(cond, self.values, other)
+        values = np.where(cond, self._values, other)
         cat = Categorical(values, dtype=self.dtype)
         return type(self)._simple_new(cat, name=self.name)
 
@@ -532,13 +532,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                 "method='nearest' not implemented yet for CategoricalIndex"
             )
 
-        if isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target):
-            if self.values.equals(target.values):
+        if isinstance(target, CategoricalIndex) and self._values.is_dtype_equal(target):
+            if self._values.equals(target._values):
                 # we have the same codes
                 codes = target.codes
             else:
                 codes = _recode_for_categories(
-                    target.codes, target.categories, self.values.categories
+                    target.codes, target.categories, self._values.categories
                 )
         else:
             if isinstance(target, CategoricalIndex):
@@ -560,7 +560,7 @@ def get_indexer_non_unique(self, target):
                 target = target.codes
                 indexer, missing = self._engine.get_indexer_non_unique(target)
                 return ensure_platform_int(indexer), missing
-            target = target.values
+            target = target._values
 
         codes = self.categories.get_indexer(target)
         indexer, missing = self._engine.get_indexer_non_unique(codes)
@@ -679,7 +679,7 @@ def map(self, mapper):
         >>> idx.map({'a': 'first', 'b': 'second'})
         Index(['first', 'second', nan], dtype='object')
         """
-        return self._shallow_copy_with_infer(self.values.map(mapper))
+        return self._shallow_copy_with_infer(self._values.map(mapper))
 
     def delete(self, loc):
         """