CLN: avoid values_from_object in Series (#32426)

jbrockmendel · web-flow · commit 8c38283cbb9c · 2020-03-10T22:29:34.000-04:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -77,6 +77,7 @@
     ensure_platform_int,
     infer_dtype_from_object,
     is_bool_dtype,
+    is_datetime64_any_dtype,
     is_dict_like,
     is_dtype_equal,
     is_extension_array_dtype,
@@ -88,6 +89,7 @@
     is_list_like,
     is_named_tuple,
     is_object_dtype,
+    is_period_dtype,
     is_scalar,
     is_sequence,
     needs_i8_conversion,
@@ -7789,11 +7791,13 @@ def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
     ):
 
-        dtype_is_dt = self.dtypes.apply(lambda x: x.kind == "M")
+        dtype_is_dt = self.dtypes.apply(
+            lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
+        )
         if numeric_only is None and name in ["mean", "median"] and dtype_is_dt.any():
             warnings.warn(
                 "DataFrame.mean and DataFrame.median with numeric_only=None "
-                "will include datetime64 and datetime64tz columns in a "
+                "will include datetime64, datetime64tz, and PeriodDtype columns in a "
                 "future version.",
                 FutureWarning,
                 stacklevel=3,
@@ -7854,6 +7858,10 @@ def blk_func(values):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()
             out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
             out.index = df.columns
+            if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
+                # FIXME: needs_i8_conversion check is kludge, not sure
+                #  why it is necessary in this case and this case alone
+                out[:] = coerce_to_dtypes(out.values, df.dtypes)
             return out
 
         if numeric_only is None:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
@@ -7,7 +7,7 @@
 
 from pandas._config import get_option
 
-from pandas._libs import NaT, Timedelta, Timestamp, iNaT, lib
+from pandas._libs import NaT, Period, Timedelta, Timestamp, iNaT, lib
 from pandas._typing import Dtype, Scalar
 from pandas.compat._optional import import_optional_dependency
 
@@ -17,9 +17,7 @@
     is_any_int_dtype,
     is_bool_dtype,
     is_complex,
-    is_datetime64_dtype,
-    is_datetime64tz_dtype,
-    is_datetime_or_timedelta_dtype,
+    is_datetime64_any_dtype,
     is_float,
     is_float_dtype,
     is_integer,
@@ -28,8 +26,10 @@
     is_object_dtype,
     is_scalar,
     is_timedelta64_dtype,
+    needs_i8_conversion,
     pandas_dtype,
 )
+from pandas.core.dtypes.dtypes import PeriodDtype
 from pandas.core.dtypes.missing import isna, na_value_for_dtype, notna
 
 from pandas.core.construction import extract_array
@@ -134,10 +134,8 @@ def f(
 
 
 def _bn_ok_dtype(dtype: Dtype, name: str) -> bool:
-    # Bottleneck chokes on datetime64
-    if not is_object_dtype(dtype) and not (
-        is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype)
-    ):
+    # Bottleneck chokes on datetime64, PeriodDtype (or and EA)
+    if not is_object_dtype(dtype) and not needs_i8_conversion(dtype):
 
         # GH 15507
         # bottleneck does not properly upcast during the sum
@@ -283,17 +281,16 @@ def _get_values(
     #  with scalar fill_value.  This guarantee is important for the
     #  maybe_upcast_putmask call below
     assert is_scalar(fill_value)
+    values = extract_array(values, extract_numpy=True)
 
     mask = _maybe_get_mask(values, skipna, mask)
 
-    values = extract_array(values, extract_numpy=True)
     dtype = values.dtype
 
-    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
+    if needs_i8_conversion(values):
         # changing timedelta64/datetime64 to int64 needs to happen after
         #  finding `mask` above
-        values = getattr(values, "asi8", values)
-        values = values.view(np.int64)
+        values = np.asarray(values.view("i8"))
 
     dtype_ok = _na_ok_dtype(dtype)
 
@@ -307,7 +304,8 @@ def _get_values(
 
     if skipna and copy:
         values = values.copy()
-        if dtype_ok:
+        assert mask is not None  # for mypy
+        if dtype_ok and mask.any():
             np.putmask(values, mask, fill_value)
 
         # promote if needed
@@ -325,13 +323,14 @@ def _get_values(
 
 
 def _na_ok_dtype(dtype) -> bool:
-    # TODO: what about datetime64tz?  PeriodDtype?
-    return not issubclass(dtype.type, (np.integer, np.timedelta64, np.datetime64))
+    if needs_i8_conversion(dtype):
+        return False
+    return not issubclass(dtype.type, np.integer)
 
 
 def _wrap_results(result, dtype: Dtype, fill_value=None):
     """ wrap our results if needed """
-    if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
+    if is_datetime64_any_dtype(dtype):
         if fill_value is None:
             # GH#24293
             fill_value = iNaT
@@ -342,7 +341,8 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
                 result = np.nan
             result = Timestamp(result, tz=tz)
         else:
-            result = result.view(dtype)
+            # If we have float dtype, taking a view will give the wrong result
+            result = result.astype(dtype)
     elif is_timedelta64_dtype(dtype):
         if not isinstance(result, np.ndarray):
             if result == fill_value:
@@ -356,6 +356,14 @@ def _wrap_results(result, dtype: Dtype, fill_value=None):
         else:
             result = result.astype("m8[ns]").view(dtype)
 
+    elif isinstance(dtype, PeriodDtype):
+        if is_float(result) and result.is_integer():
+            result = int(result)
+        if is_integer(result):
+            result = Period._from_ordinal(result, freq=dtype.freq)
+        else:
+            raise NotImplementedError(type(result), result)
+
     return result
 
 
@@ -542,12 +550,7 @@ def nanmean(values, axis=None, skipna=True, mask=None):
     )
     dtype_sum = dtype_max
     dtype_count = np.float64
-    if (
-        is_integer_dtype(dtype)
-        or is_timedelta64_dtype(dtype)
-        or is_datetime64_dtype(dtype)
-        or is_datetime64tz_dtype(dtype)
-    ):
+    if is_integer_dtype(dtype) or needs_i8_conversion(dtype):
         dtype_sum = np.float64
     elif is_float_dtype(dtype):
         dtype_sum = dtype
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1985,7 +1985,7 @@ def idxmin(self, axis=0, skipna=True, *args, **kwargs):
         nan
         """
         skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
-        i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
+        i = nanops.nanargmin(self._values, skipna=skipna)
         if i == -1:
             return np.nan
         return self.index[i]
@@ -2056,7 +2056,7 @@ def idxmax(self, axis=0, skipna=True, *args, **kwargs):
         nan
         """
         skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
-        i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
+        i = nanops.nanargmax(self._values, skipna=skipna)
         if i == -1:
             return np.nan
         return self.index[i]
@@ -2094,7 +2094,7 @@ def round(self, decimals=0, *args, **kwargs) -> "Series":
         dtype: float64
         """
         nv.validate_round(args, kwargs)
-        result = com.values_from_object(self).round(decimals)
+        result = self._values.round(decimals)
         result = self._constructor(result, index=self.index).__finalize__(self)
 
         return result
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -856,11 +856,6 @@ def test_mean_datetimelike(self):
         expected = pd.Series({"A": 1.0, "C": df.loc[1, "C"]})
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.xfail(
-        reason="casts to object-dtype and then tries to add timestamps",
-        raises=TypeError,
-        strict=True,
-    )
     def test_mean_datetimelike_numeric_only_false(self):
         df = pd.DataFrame(
             {