From 90fb20e1fc3153954d7b2438ea189b31e907e5ae Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 7 Jul 2018 09:39:41 -0500
Subject: [PATCH 01/23] ENH: add groupby & reduce support to EA

closes #21789
closes #22346
---
 doc/source/whatsnew/v0.24.0.txt         |  9 ++++-
 pandas/conftest.py                      | 23 ++++++++++++
 pandas/core/arrays/base.py              | 29 +++++++++++++++
 pandas/core/arrays/integer.py           | 49 +++++++++++++++++++++++++
 pandas/tests/arrays/test_integer.py     |  9 +++--
 pandas/tests/extension/base/__init__.py |  1 +
 pandas/tests/extension/base/groupby.py  |  8 ++--
 pandas/tests/extension/base/reduce.py   | 38 +++++++++++++++++++
 pandas/tests/extension/test_integer.py  | 48 +++++++++++++++++-------
 9 files changed, 193 insertions(+), 21 deletions(-)
 create mode 100644 pandas/tests/extension/base/reduce.py

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index 3d82dd042da20..b241883dc0d2a 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -48,7 +48,7 @@ Pandas has gained the ability to hold integer dtypes with missing values. This l
 Here is an example of the usage.
 
 We can construct a ``Series`` with the specified dtype. The dtype string ``Int64`` is a pandas ``ExtensionDtype``. Specifying a list or array using the traditional missing value
-marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`)
+marker of ``np.nan`` will infer to integer dtype. The display of the ``Series`` will also use the ``NaN`` to indicate missing values in string outputs. (:issue:`20700`, :issue:`20747`, :issue:`22441`, :issue:`21789`, :issue:`22346`)
 
 .. ipython:: python
 
@@ -91,6 +91,13 @@ These dtypes can be merged & reshaped & casted.
    pd.concat([df[['A']], df[['B', 'C']]], axis=1).dtypes
    df['A'].astype(float)
 
+Reduction and groupby operations such as 'sum' work.
+
+.. ipython:: python
+
+   df.sum()
+   df.groupby('B').A.sum()
+
 .. warning::
 
    The Integer NA support currently uses the captilized dtype version, e.g. ``Int8`` as compared to the traditional ``int8``. This may be changed at a future date.
diff --git a/pandas/conftest.py b/pandas/conftest.py
index 621de3ffd4b12..d24400d09809e 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -131,6 +131,29 @@ def all_arithmetic_operators(request):
     return request.param
 
 
+_all_numeric_reductions = ['sum', 'max', 'min',
+                           'mean', 'prod', 'std', 'var', 'median']
+
+
+@pytest.fixture(params=_all_numeric_reductions)
+def all_numeric_reductions(request):
+    """
+    Fixture for numeric reduction names
+    """
+    return request.param
+
+
+_all_boolean_reductions = ['all', 'any']
+
+
+@pytest.fixture(params=_all_boolean_reductions)
+def all_boolean_reductions(request):
+    """
+    Fixture for boolean reduction names
+    """
+    return request.param
+
+
 _cython_table = pd.core.base.SelectionMixin._cython_table.items()
 
 
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 627afd1b6f860..fe07b302eb02b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -59,6 +59,10 @@ class ExtensionArray(object):
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
 
+    One can implement methods to handle array reductions.
+
+    * _reduce
+
     The remaining methods implemented on this class should be performant,
     as they only compose abstract methods. Still, a more efficient
     implementation may be available, and these methods can be overridden.
@@ -713,6 +717,31 @@ def _add_comparison_ops(cls):
         cls.__le__ = cls._create_comparison_method(operator.le)
         cls.__ge__ = cls._create_comparison_method(operator.ge)
 
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwargs):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwargs : dict
+
+        Returns
+        -------
+        scalar
+        """
+        raise AbstractMethodError(self)
+
 
 class ExtensionScalarOpsMixin(ExtensionOpsMixin):
     """A mixin for defining the arithmetic and logical operations on
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index e58109a25e1a5..7e1e7315232aa 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -529,6 +529,55 @@ def cmp_method(self, other):
         name = '__{name}__'.format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
 
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwds):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwds : dict
+
+        Returns
+        -------
+        scalar
+        """
+
+        data = self._data
+        mask = self._mask
+
+        # coerce to a nan-aware float if needed
+        if mask.any():
+            data = self._data.astype('float64')
+            data[mask] = self._na_value
+
+        result = op(data, axis=axis, skipna=skipna)
+
+        # if we have a boolean op, provide coercion back to a bool
+        # type if possible
+        if name in ['any', 'all']:
+            if is_integer(result) or is_float(result):
+                result = bool(int(result))
+
+        # if we have a numeric op, provide coercion back to an integer
+        # type if possible
+        elif not isna(result):
+            int_result = int(result)
+            if int_result == result:
+                result = int_result
+
+        return result
+
     def _maybe_mask_result(self, result, mask, other, op_name):
         """
         Parameters
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 349a6aee5701e..3f3653f375149 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -587,15 +587,18 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
 
-def test_groupby_mean_included():
+@pytest.mark.parametrize('op', ['sum', 'min', 'max'])
+def test_preserve_groupby_dtypes(op):
+    # TODO(#22346): preserve Int64 dtype
+    # for ops that enable (mean would actually work here
+    # but generally it is a float return value)
     df = pd.DataFrame({
         "A": ['a', 'b', 'b'],
         "B": [1, None, 3],
         "C": integer_array([1, None, 3], dtype='Int64'),
     })
 
-    result = df.groupby("A").sum()
-    # TODO(#22346): preserve Int64 dtype
+    result = getattr(df.groupby("A"), op)()
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
         "C": np.array([1, 3], dtype="int64")
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index b6b81bb941a59..b5d4fd676245e 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -48,6 +48,7 @@ class TestMyDtype(BaseDtypeTests):
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
+from .reduce import BaseNumericReduceTests, BaseBooleanReduceTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
 from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 174997c7d51e1..52c635d286df6 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -25,8 +25,8 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
                            "B": data_for_grouping})
         result = df.groupby("B", as_index=as_index).A.mean()
         _, index = pd.factorize(data_for_grouping, sort=True)
-        # TODO(ExtensionIndex): remove astype
-        index = pd.Index(index.astype(object), name="B")
+
+        index = pd.Index(index, name="B")
         expected = pd.Series([3, 1, 4], index=index, name="A")
         if as_index:
             self.assert_series_equal(result, expected)
@@ -39,8 +39,8 @@ def test_groupby_extension_no_sort(self, data_for_grouping):
                            "B": data_for_grouping})
         result = df.groupby("B", sort=False).A.mean()
         _, index = pd.factorize(data_for_grouping, sort=False)
-        # TODO(ExtensionIndex): remove astype
-        index = pd.Index(index.astype(object), name="B")
+
+        index = pd.Index(index, name="B")
         expected = pd.Series([1, 3, 4], index=index, name="A")
         self.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
new file mode 100644
index 0000000000000..ce53ba91ff2fc
--- /dev/null
+++ b/pandas/tests/extension/base/reduce.py
@@ -0,0 +1,38 @@
+import warnings
+import pytest
+import pandas.util.testing as tm
+import pandas as pd
+from .base import BaseExtensionTests
+
+
+class BaseReduceTests(BaseExtensionTests):
+    """
+    Reduction specific tests. Generally these only
+    make sense for numeric/boolean operations.
+    """
+    def check_reduce(self, s, op_name, skipna):
+        result = getattr(s, op_name)(skipna=skipna)
+        expected = getattr(s.astype('float64'), op_name)(skipna=skipna)
+        tm.assert_almost_equal(result, expected)
+
+
+class BaseNumericReduceTests(BaseReduceTests):
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
+        s = pd.Series(data)
+
+        # min/max with empty produce numpy warnings
+        with warnings.catch_warnings(record=True):
+            warnings.simplefilter("ignore", RuntimeWarning)
+            self.check_reduce(s, op_name, skipna)
+
+
+class BaseBooleanReduceTests(BaseReduceTests):
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series(self, data, all_boolean_reductions, skipna):
+        op_name = all_boolean_reductions
+        s = pd.Series(data)
+        self.check_reduce(s, op_name, skipna)
diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index fa5c89d85e548..163f2ad6fc72b 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -208,17 +208,39 @@ class TestCasting(base.BaseCastingTests):
 
 class TestGroupby(base.BaseGroupbyTests):
 
-    @pytest.mark.xfail(reason="groupby not working", strict=True)
-    def test_groupby_extension_no_sort(self, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_no_sort(
-            data_for_grouping)
-
-    @pytest.mark.parametrize('as_index', [
-        pytest.param(True,
-                     marks=pytest.mark.xfail(reason="groupby not working",
-                                             strict=True)),
-        False
-    ])
+    @pytest.mark.parametrize('as_index', [True, False])
     def test_groupby_extension_agg(self, as_index, data_for_grouping):
-        super(TestGroupby, self).test_groupby_extension_agg(
-            as_index, data_for_grouping)
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        result = df.groupby("B", as_index=as_index).A.mean()
+        _, index = pd.factorize(data_for_grouping, sort=True)
+
+        # TODO(ExtensionIndex): remove coercion to object
+        # we don't have an easy way to represent an EA as an Index object
+        index = pd.Index(index, name="B", dtype=object)
+        expected = pd.Series([3, 1, 4], index=index, name="A")
+        if as_index:
+            self.assert_series_equal(result, expected)
+        else:
+            expected = expected.reset_index()
+            self.assert_frame_equal(result, expected)
+
+    def test_groupby_extension_no_sort(self, data_for_grouping):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        result = df.groupby("B", sort=False).A.mean()
+        _, index = pd.factorize(data_for_grouping, sort=False)
+
+        # TODO(ExtensionIndex): remove coercion to object
+        # we don't have an easy way to represent an EA as an Index object
+        index = pd.Index(index, name="B", dtype=object)
+        expected = pd.Series([1, 3, 4], index=index, name="A")
+        self.assert_series_equal(result, expected)
+
+
+class TestNumericReduce(base.BaseNumericReduceTests):
+    pass
+
+
+class TestBooleanReduce(base.BaseBooleanReduceTests):
+    pass

From 63899aadae90ba05431eaa111d18ef6f6e3c764f Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 19 Sep 2018 10:32:27 -0400
Subject: [PATCH 02/23] don't need record=True

---
 pandas/tests/extension/base/reduce.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
index ce53ba91ff2fc..5f3db6957bc07 100644
--- a/pandas/tests/extension/base/reduce.py
+++ b/pandas/tests/extension/base/reduce.py
@@ -24,7 +24,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna):
         s = pd.Series(data)
 
         # min/max with empty produce numpy warnings
-        with warnings.catch_warnings(record=True):
+        with warnings.catch_warnings():
             warnings.simplefilter("ignore", RuntimeWarning)
             self.check_reduce(s, op_name, skipna)
 

From 05f27a137b49f5a75b7caf3912ee6b7c9b652660 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 19 Sep 2018 11:31:42 -0400
Subject: [PATCH 03/23] dispatch to Index types on _reduce Decimal is a numeric
 type, support in groupby

---
 pandas/core/arrays/base.py              |  4 +++-
 pandas/tests/extension/decimal/array.py | 25 +++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index fe07b302eb02b..5f2722a6cbc98 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -740,7 +740,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         -------
         scalar
         """
-        raise AbstractMethodError(self)
+
+        # we dispatchh to the nanops operations
+        return op(self, axis=axis, skipna=skipna)
 
 
 class ExtensionScalarOpsMixin(ExtensionOpsMixin):
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index a1ee3a4fefef2..3186ed43f5e41 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -134,6 +134,31 @@ def _na_value(self):
     def _concat_same_type(cls, to_concat):
         return cls(np.concatenate([x._data for x in to_concat]))
 
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwds):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwds : dict
+
+        Returns
+        -------
+        scalar
+        """
+        return op(self.data, axis=axis, skipna=skipna)
+
 
 def to_decimal(values, context=None):
     return DecimalArray([decimal.Decimal(x) for x in values], context=context)

From 9dbbe51762fdb8504bbb56f305ede1c784e203f6 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 19 Sep 2018 12:03:42 -0400
Subject: [PATCH 04/23] handle min/max correctly for datetimelikes

---
 pandas/core/arrays/base.py         |  4 +---
 pandas/core/arrays/datetimelike.py | 36 ++++++++++++++++++++++++++++++
 pandas/core/dtypes/common.py       |  3 ++-
 pandas/tests/dtypes/test_common.py |  2 ++
 4 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 5f2722a6cbc98..fe07b302eb02b 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -740,9 +740,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         -------
         scalar
         """
-
-        # we dispatchh to the nanops operations
-        return op(self, axis=axis, skipna=skipna)
+        raise AbstractMethodError(self)
 
 
 class ExtensionScalarOpsMixin(ExtensionOpsMixin):
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index e4ace2bfe1509..cc762f5e0aa1f 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -342,6 +342,42 @@ def _validate_frequency(cls, index, freq, **kwargs):
                              'does not conform to passed frequency {passed}'
                              .format(infer=inferred, passed=freq.freqstr))
 
+    # ------------------------------------------------------------------
+    # Reduction Methods
+
+    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
+                filter_type=None, **kwargs):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        op : callable
+            function to apply to the array
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        numeric_only : bool, optional
+            if True, only perform numeric ops
+        filter_type : str, optional
+        kwargs : dict
+
+        Returns
+        -------
+        scalar
+        """
+        # if we have a reduction op already defined, use it
+        # this is important for min/max where tz's must be preserved
+        # and nanops is not geared towards this
+        # TODO(jreback): we are ignoring skipna
+        if hasattr(self, name):
+            return getattr(self, name)()
+
+        # we dispatch to the nanops operations
+        return op(self, axis=axis, skipna=skipna)
+
     # ------------------------------------------------------------------
     # Arithmetic Methods
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index a9fc9d13d4ab3..69c47de2c519c 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1237,7 +1237,8 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
     if arr_or_dtype is None:
         return False
     tipo = _get_dtype_type(arr_or_dtype)
-    return issubclass(tipo, (np.datetime64, np.timedelta64))
+    return (issubclass(tipo, (np.datetime64, np.timedelta64)) or
+            is_datetime64tz_dtype(arr_or_dtype))
 
 
 def _is_unorderable_exception(e):
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index f87c51a4ee16b..6c23dbb6c257e 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -388,6 +388,8 @@ def test_is_datetime_or_timedelta_dtype():
     assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
 
     assert com.is_datetime_or_timedelta_dtype(np.datetime64)
+    assert com.is_datetime_or_timedelta_dtype(
+        DatetimeTZDtype("ns", "US/Eastern"))
     assert com.is_datetime_or_timedelta_dtype(np.timedelta64)
     assert com.is_datetime_or_timedelta_dtype(
         np.array([], dtype=np.timedelta64))

From d4c0a3e260608c709db60556e4c287618593c72e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 19 Sep 2018 19:13:51 -0400
Subject: [PATCH 05/23] share doc-string with base class

---
 pandas/core/arrays/datetimelike.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index cc762f5e0aa1f..8645a135d7c92 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -347,27 +347,6 @@ def _validate_frequency(cls, index, freq, **kwargs):
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwargs):
-        """Return a scalar result of performing the op
-
-        Parameters
-        ----------
-        op : callable
-            function to apply to the array
-        name : str
-            name of the function
-        axis : int, default 0
-            axis over which to apply, defined as 0 currently
-        skipna : bool, default True
-            if True, skip NaN values
-        numeric_only : bool, optional
-            if True, only perform numeric ops
-        filter_type : str, optional
-        kwargs : dict
-
-        Returns
-        -------
-        scalar
-        """
         # if we have a reduction op already defined, use it
         # this is important for min/max where tz's must be preserved
         # and nanops is not geared towards this

From bb3f37c7ae6ce7604cfdc721054c98406c958509 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 19 Sep 2018 19:17:13 -0400
Subject: [PATCH 06/23] remove more uneeded doc-strings

---
 pandas/core/arrays/integer.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 7e1e7315232aa..0b62865fe3776 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -531,28 +531,6 @@ def cmp_method(self, other):
 
     def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
                 filter_type=None, **kwds):
-        """Return a scalar result of performing the op
-
-        Parameters
-        ----------
-        op : callable
-            function to apply to the array
-        name : str
-            name of the function
-        axis : int, default 0
-            axis over which to apply, defined as 0 currently
-        skipna : bool, default True
-            if True, skip NaN values
-        numeric_only : bool, optional
-            if True, only perform numeric ops
-        filter_type : str, optional
-        kwds : dict
-
-        Returns
-        -------
-        scalar
-        """
-
         data = self._data
         mask = self._mask
 

From e7fbe0f8a15a62c04f9ef2199a2618f220d805dd Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 23 Sep 2018 12:40:17 -0400
Subject: [PATCH 07/23] simplify EA._reduce

---
 pandas/core/arrays/base.py              |  8 +------
 pandas/core/arrays/integer.py           |  5 ++--
 pandas/core/series.py                   | 14 +++++++----
 pandas/tests/extension/decimal/array.py | 32 +++++++------------------
 4 files changed, 23 insertions(+), 36 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index fe07b302eb02b..b7ee5f7150029 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -717,23 +717,17 @@ def _add_comparison_ops(cls):
         cls.__le__ = cls._create_comparison_method(operator.le)
         cls.__ge__ = cls._create_comparison_method(operator.ge)
 
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwargs):
+    def _reduce(self, name, skipna=True, **kwargs):
         """Return a scalar result of performing the op
 
         Parameters
         ----------
-        op : callable
-            function to apply to the array
         name : str
             name of the function
         axis : int, default 0
             axis over which to apply, defined as 0 currently
         skipna : bool, default True
             if True, skip NaN values
-        numeric_only : bool, optional
-            if True, only perform numeric ops
-        filter_type : str, optional
         kwargs : dict
 
         Returns
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 0b62865fe3776..0e0c43473c168 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -8,6 +8,7 @@
 from pandas.compat import u, range, string_types
 from pandas.compat import set_function_name
 
+from pandas.core import nanops
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.generic import ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
@@ -529,8 +530,7 @@ def cmp_method(self, other):
         name = '__{name}__'.format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
 
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwds):
+    def _reduce(self, name, axis=0, skipna=True, **kwargs):
         data = self._data
         mask = self._mask
 
@@ -539,6 +539,7 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
             data = self._data.astype('float64')
             data[mask] = self._na_value
 
+        op = getattr(nanops, 'nan' + name)
         result = op(data, axis=axis, skipna=skipna)
 
         # if we have a boolean op, provide coercion back to a bool
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a613b22ea9046..b7d827a370299 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3392,10 +3392,16 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
 
         """
         delegate = self._values
-        if isinstance(delegate, np.ndarray):
-            # Validate that 'axis' is consistent with Series's single axis.
-            if axis is not None:
-                self._get_axis_number(axis)
+
+        if axis is not None:
+            self._get_axis_number(axis)
+
+        # dispatch to ExtensionArray interface
+        if isinstance(delegate, ExtensionArray):
+            return delegate._reduce(name, skipna=skipna, **kwds)
+
+        # dispatch to numpy arrays
+        elif isinstance(delegate, np.ndarray):
             if numeric_only:
                 raise NotImplementedError('Series.{0} does not implement '
                                           'numeric_only.'.format(name))
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 3186ed43f5e41..de3df0403851f 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -134,30 +134,16 @@ def _na_value(self):
     def _concat_same_type(cls, to_concat):
         return cls(np.concatenate([x._data for x in to_concat]))
 
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwds):
-        """Return a scalar result of performing the op
-
-        Parameters
-        ----------
-        op : callable
-            function to apply to the array
-        name : str
-            name of the function
-        axis : int, default 0
-            axis over which to apply, defined as 0 currently
-        skipna : bool, default True
-            if True, skip NaN values
-        numeric_only : bool, optional
-            if True, only perform numeric ops
-        filter_type : str, optional
-        kwds : dict
+    def _reduce(self, name, axis=0, skipna=True, **kwargs):
 
-        Returns
-        -------
-        scalar
-        """
-        return op(self.data, axis=axis, skipna=skipna)
+        # select the nan* ops
+        if skipna:
+            op = getattr(self.data, 'nan' + name)
+
+        # don't skip nans
+        else:
+            op = getattr(self.data, name)
+        return op(axis=axis)
 
 
 def to_decimal(values, context=None):

From fddf93821c64b39faa0c344efd4940ce66378dd7 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 23 Sep 2018 15:52:35 -0400
Subject: [PATCH 08/23] fix categorical

---
 pandas/core/arrays/categorical.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 216bccf7d6309..79070bbbfd11a 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -2069,14 +2069,12 @@ def _reverse_indexer(self):
         return result
 
     # reduction ops #
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwds):
-        """ perform the reduction type operation """
+    def _reduce(self, name, axis=0, skipna=True, **kwargs):
         func = getattr(self, name, None)
         if func is None:
             msg = 'Categorical cannot perform the operation {op}'
             raise TypeError(msg.format(op=name))
-        return func(numeric_only=numeric_only, **kwds)
+        return func(**kwargs)
 
     def min(self, numeric_only=None, **kwargs):
         """ The minimum value of the object.

From 49efedfa3dfc19468a389f2ca6a04b49e1a9e420 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 1 Oct 2018 08:44:41 -0400
Subject: [PATCH 09/23] make _reduce an always defined method on Base; raise
 TypeError if invoked and not overriden, test the same

---
 pandas/core/arrays/base.py                    | 42 ++++++++++---------
 pandas/tests/extension/arrow/test_bool.py     |  4 ++
 pandas/tests/extension/base/__init__.py       |  2 +-
 pandas/tests/extension/base/reduce.py         | 20 +++++++++
 pandas/tests/extension/decimal/array.py       |  9 ++--
 .../tests/extension/decimal/test_decimal.py   | 22 ++++++++++
 pandas/tests/extension/json/test_json.py      |  4 ++
 pandas/tests/extension/test_categorical.py    |  4 ++
 pandas/tests/extension/test_interval.py       |  4 ++
 9 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index b7ee5f7150029..01a580b52ab3f 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -679,6 +679,29 @@ def _ndarray_values(self):
         """
         return np.array(self)
 
+    def _reduce(self, name, skipna=True, **kwargs):
+        """Return a scalar result of performing the op
+
+        Parameters
+        ----------
+        name : str
+            name of the function
+        axis : int, default 0
+            axis over which to apply, defined as 0 currently
+        skipna : bool, default True
+            if True, skip NaN values
+        kwargs : dict
+
+        Returns
+        -------
+        scalar
+
+        Raises
+        ------
+        TypeError : subclass does not define reductions
+        """
+        raise TypeError
+
 
 class ExtensionOpsMixin(object):
     """
@@ -717,25 +740,6 @@ def _add_comparison_ops(cls):
         cls.__le__ = cls._create_comparison_method(operator.le)
         cls.__ge__ = cls._create_comparison_method(operator.ge)
 
-    def _reduce(self, name, skipna=True, **kwargs):
-        """Return a scalar result of performing the op
-
-        Parameters
-        ----------
-        name : str
-            name of the function
-        axis : int, default 0
-            axis over which to apply, defined as 0 currently
-        skipna : bool, default True
-            if True, skip NaN values
-        kwargs : dict
-
-        Returns
-        -------
-        scalar
-        """
-        raise AbstractMethodError(self)
-
 
 class ExtensionScalarOpsMixin(ExtensionOpsMixin):
     """A mixin for defining the arithmetic and logical operations on
diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py
index e1afedcade3ff..12c37d1fdf895 100644
--- a/pandas/tests/extension/arrow/test_bool.py
+++ b/pandas/tests/extension/arrow/test_bool.py
@@ -39,6 +39,10 @@ def test_from_dtype(self, data):
         pytest.skip("GH-22666")
 
 
+class TestReduce(base.BaseNoReduceTests):
+    pass
+
+
 def test_is_bool_dtype(data):
     assert pd.api.types.is_bool_dtype(data)
     assert pd.core.common.is_bool_indexer(data)
diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py
index b5d4fd676245e..d11bb8b6beb77 100644
--- a/pandas/tests/extension/base/__init__.py
+++ b/pandas/tests/extension/base/__init__.py
@@ -48,7 +48,7 @@ class TestMyDtype(BaseDtypeTests):
 from .interface import BaseInterfaceTests  # noqa
 from .methods import BaseMethodsTests  # noqa
 from .ops import BaseArithmeticOpsTests, BaseComparisonOpsTests, BaseOpsUtil  # noqa
-from .reduce import BaseNumericReduceTests, BaseBooleanReduceTests  # noqa
+from .reduce import BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests  # noqa
 from .missing import BaseMissingTests  # noqa
 from .reshaping import BaseReshapingTests  # noqa
 from .setitem import BaseSetitemTests  # noqa
diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py
index 5f3db6957bc07..4f6c7988314c0 100644
--- a/pandas/tests/extension/base/reduce.py
+++ b/pandas/tests/extension/base/reduce.py
@@ -16,6 +16,26 @@ def check_reduce(self, s, op_name, skipna):
         tm.assert_almost_equal(result, expected)
 
 
+class BaseNoReduceTests(BaseReduceTests):
+    """ we don't define any reductions """
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
+        op_name = all_numeric_reductions
+        s = pd.Series(data)
+
+        with pytest.raises(TypeError):
+            getattr(s, op_name)(skipna=skipna)
+
+    @pytest.mark.parametrize('skipna', [True, False])
+    def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
+        op_name = all_boolean_reductions
+        s = pd.Series(data)
+
+        with pytest.raises(TypeError):
+            getattr(s, op_name)(skipna=skipna)
+
+
 class BaseNumericReduceTests(BaseReduceTests):
 
     @pytest.mark.parametrize('skipna', [True, False])
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index de3df0403851f..d72756a254110 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -136,13 +136,14 @@ def _concat_same_type(cls, to_concat):
 
     def _reduce(self, name, axis=0, skipna=True, **kwargs):
 
-        # select the nan* ops
         if skipna:
-            op = getattr(self.data, 'nan' + name)
+            raise NotImplementedError("decimal does not support skipna=True")
 
-        # don't skip nans
-        else:
+        try:
             op = getattr(self.data, name)
+        except AttributeError:
+            raise NotImplementedError("decimal does not support "
+                                      "the {} operation".format(name))
         return op(axis=axis)
 
 
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 6724e183a0606..e3c44f5755cc7 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -131,6 +131,28 @@ class TestMissing(BaseDecimal, base.BaseMissingTests):
     pass
 
 
+class Reduce:
+
+    def check_reduce(self, s, op_name, skipna):
+
+        if skipna or op_name in ['median']:
+            with pytest.raises(NotImplementedError):
+                getattr(s, op_name)(skipna=skipna)
+
+        else:
+            result = getattr(s, op_name)(skipna=skipna)
+            expected = getattr(np.asarray(s), op_name)()
+            tm.assert_almost_equal(result, expected)
+
+
+class TestNumericReduce(Reduce, base.BaseNumericReduceTests):
+    pass
+
+
+class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests):
+    pass
+
+
 class TestMethods(BaseDecimal, base.BaseMethodsTests):
     @pytest.mark.parametrize('dropna', [True, False])
     @pytest.mark.xfail(reason="value_counts not implemented yet.")
diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
index 6c8b12ed865fc..15d99f6c5d2fc 100644
--- a/pandas/tests/extension/json/test_json.py
+++ b/pandas/tests/extension/json/test_json.py
@@ -160,6 +160,10 @@ def test_fillna_frame(self):
                               reason="Dictionary order unstable")
 
 
+class TestReduce(base.BaseNoReduceTests):
+    pass
+
+
 class TestMethods(BaseJSON, base.BaseMethodsTests):
     @unhashable
     def test_value_counts(self, all_data, dropna):
diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py
index f118279c4b915..a4518798aa400 100644
--- a/pandas/tests/extension/test_categorical.py
+++ b/pandas/tests/extension/test_categorical.py
@@ -164,6 +164,10 @@ def test_fillna_limit_backfill(self):
         pass
 
 
+class TestReduce(base.BaseNoReduceTests):
+    pass
+
+
 class TestMethods(base.BaseMethodsTests):
     pass
 
diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py
index 7302c5757d144..183ebea927b10 100644
--- a/pandas/tests/extension/test_interval.py
+++ b/pandas/tests/extension/test_interval.py
@@ -98,6 +98,10 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests):
     pass
 
 
+class TestReduce(base.BaseNoReduceTests):
+    pass
+
+
 class TestMethods(BaseInterval, base.BaseMethodsTests):
 
     @pytest.mark.skip(reason='addition is not defined for intervals')

From 371ab5410eae666fc405f67f909b810b4d63925b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 3 Oct 2018 07:28:50 -0400
Subject: [PATCH 10/23] pass mask to nanops

---
 pandas/core/arrays/integer.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 0e0c43473c168..76bc315d71484 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -540,7 +540,7 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
             data[mask] = self._na_value
 
         op = getattr(nanops, 'nan' + name)
-        result = op(data, axis=axis, skipna=skipna)
+        result = op(data, axis=axis, skipna=skipna, mask=mask)
 
         # if we have a boolean op, provide coercion back to a bool
         # type if possible

From d94b85e1155be23ea9768fdcf8398c858cd47479 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 3 Oct 2018 08:09:49 -0400
Subject: [PATCH 11/23] review comments

---
 doc/source/whatsnew/v0.24.0.txt                |  1 +
 pandas/conftest.py                             |  3 ++-
 pandas/core/arrays/base.py                     | 10 +++++++---
 pandas/core/arrays/integer.py                  | 12 +++++-------
 pandas/core/series.py                          |  3 +++
 pandas/tests/extension/decimal/array.py        |  4 ++--
 pandas/tests/extension/decimal/test_decimal.py |  2 +-
 7 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index b241883dc0d2a..29b766e616b3b 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -557,6 +557,7 @@ update the ``ExtensionDtype._metadata`` tuple to match the signature of your
 - Added :meth:`pandas.api.types.register_extension_dtype` to register an extension type with pandas (:issue:`22664`)
 - Series backed by an ``ExtensionArray`` now work with :func:`util.hash_pandas_object` (:issue:`23066`)
 - Updated the ``.type`` attribute for ``PeriodDtype``, ``DatetimeTZDtype``, and ``IntervalDtype`` to be instances of the dtype (``Period``, ``Timestamp``, and ``Interval`` respectively) (:issue:`22938`)
+- Support for reduction operations such as ``sum``, ``mean`` via opt-in base class method override (:issue:`22762`)
 
 .. _whatsnew_0240.api.incompatibilities:
 
diff --git a/pandas/conftest.py b/pandas/conftest.py
index d24400d09809e..e84657a79b51a 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -132,7 +132,8 @@ def all_arithmetic_operators(request):
 
 
 _all_numeric_reductions = ['sum', 'max', 'min',
-                           'mean', 'prod', 'std', 'var', 'median']
+                           'mean', 'prod', 'std', 'var', 'median',
+                           'kurt', 'skew']
 
 
 @pytest.fixture(params=_all_numeric_reductions)
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 01a580b52ab3f..ad46cfc2054ef 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -680,17 +680,20 @@ def _ndarray_values(self):
         return np.array(self)
 
     def _reduce(self, name, skipna=True, **kwargs):
-        """Return a scalar result of performing the op
+        """Return a scalar result of performing the reduction operation.
 
         Parameters
         ----------
         name : str
-            name of the function
+            name of the function, support values are:
+            {any, all, min, max, sum, mean, median, prod,
+             std, var, sem, kurt, skew}
         axis : int, default 0
             axis over which to apply, defined as 0 currently
         skipna : bool, default True
             if True, skip NaN values
         kwargs : dict
+            ddof is the only supported kwarg
 
         Returns
         -------
@@ -700,7 +703,8 @@ def _reduce(self, name, skipna=True, **kwargs):
         ------
         TypeError : subclass does not define reductions
         """
-        raise TypeError
+        raise TypeError("cannot perform {name} with type {dtype}".format(
+            name=name, dtype=self.dtype))
 
 
 class ExtensionOpsMixin(object):
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 76bc315d71484..e31a7571f62d6 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -530,7 +530,7 @@ def cmp_method(self, other):
         name = '__{name}__'.format(name=op.__name__)
         return set_function_name(cmp_method, name, cls)
 
-    def _reduce(self, name, axis=0, skipna=True, **kwargs):
+    def _reduce(self, name, skipna=True, **kwargs):
         data = self._data
         mask = self._mask
 
@@ -540,17 +540,15 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
             data[mask] = self._na_value
 
         op = getattr(nanops, 'nan' + name)
-        result = op(data, axis=axis, skipna=skipna, mask=mask)
+        result = op(data, axis=0, skipna=skipna, mask=mask)
 
-        # if we have a boolean op, provide coercion back to a bool
-        # type if possible
+        # if we have a boolean op, don't coerce
         if name in ['any', 'all']:
-            if is_integer(result) or is_float(result):
-                result = bool(int(result))
+            pass
 
         # if we have a numeric op, provide coercion back to an integer
         # type if possible
-        elif not isna(result):
+        elif notna(result):
             int_result = int(result)
             if int_result == result:
                 result = int_result
diff --git a/pandas/core/series.py b/pandas/core/series.py
index b7d827a370299..bff0f9fe25532 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3408,6 +3408,9 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
             with np.errstate(all='ignore'):
                 return op(delegate, skipna=skipna, **kwds)
 
+        # TODO(EA) dispatch to Index
+        # remove once all internals extension types are
+        # moved to ExtensionArrays
         return delegate._reduce(op=op, name=name, axis=axis, skipna=skipna,
                                 numeric_only=numeric_only,
                                 filter_type=filter_type, **kwds)
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index d72756a254110..53a598559393c 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -134,7 +134,7 @@ def _na_value(self):
     def _concat_same_type(cls, to_concat):
         return cls(np.concatenate([x._data for x in to_concat]))
 
-    def _reduce(self, name, axis=0, skipna=True, **kwargs):
+    def _reduce(self, name, skipna=True, **kwargs):
 
         if skipna:
             raise NotImplementedError("decimal does not support skipna=True")
@@ -144,7 +144,7 @@ def _reduce(self, name, axis=0, skipna=True, **kwargs):
         except AttributeError:
             raise NotImplementedError("decimal does not support "
                                       "the {} operation".format(name))
-        return op(axis=axis)
+        return op(axis=0)
 
 
 def to_decimal(values, context=None):
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index e3c44f5755cc7..9f8ac3b51067c 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -135,7 +135,7 @@ class Reduce:
 
     def check_reduce(self, s, op_name, skipna):
 
-        if skipna or op_name in ['median']:
+        if skipna or op_name in ['median', 'skew', 'kurt']:
             with pytest.raises(NotImplementedError):
                 getattr(s, op_name)(skipna=skipna)
 

From 67747911035998446d741e3d7f034b9aa2611f8a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 5 Oct 2018 07:54:21 -0400
Subject: [PATCH 12/23] cleanup

---
 pandas/core/arrays/datetimelike.py | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 8645a135d7c92..e4ace2bfe1509 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -342,21 +342,6 @@ def _validate_frequency(cls, index, freq, **kwargs):
                              'does not conform to passed frequency {passed}'
                              .format(infer=inferred, passed=freq.freqstr))
 
-    # ------------------------------------------------------------------
-    # Reduction Methods
-
-    def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
-                filter_type=None, **kwargs):
-        # if we have a reduction op already defined, use it
-        # this is important for min/max where tz's must be preserved
-        # and nanops is not geared towards this
-        # TODO(jreback): we are ignoring skipna
-        if hasattr(self, name):
-            return getattr(self, name)()
-
-        # we dispatch to the nanops operations
-        return op(self, axis=axis, skipna=skipna)
-
     # ------------------------------------------------------------------
     # Arithmetic Methods
 

From 6699080fa41f611b79a11ad67ca8a79ae9cfd001 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 5 Oct 2018 07:56:32 -0400
Subject: [PATCH 13/23] doc

---
 pandas/core/arrays/base.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index ad46cfc2054ef..14df4d9bf8670 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -688,8 +688,6 @@ def _reduce(self, name, skipna=True, **kwargs):
             name of the function, support values are:
             {any, all, min, max, sum, mean, median, prod,
              std, var, sem, kurt, skew}
-        axis : int, default 0
-            axis over which to apply, defined as 0 currently
         skipna : bool, default True
             if True, skip NaN values
         kwargs : dict

From 29c3cf7788334be62b22ddee19cc8de7b04329e2 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 5 Oct 2018 08:38:40 -0400
Subject: [PATCH 14/23] lint issue

---
 pandas/tests/extension/decimal/test_decimal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 9f8ac3b51067c..f84d24295b049 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -131,7 +131,7 @@ class TestMissing(BaseDecimal, base.BaseMissingTests):
     pass
 
 
-class Reduce:
+class Reduce(object):
 
     def check_reduce(self, s, op_name, skipna):
 

From 0fdaf683bf4478ec553468df1499fb6470669101 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 6 Oct 2018 10:07:44 -0400
Subject: [PATCH 15/23] comments

---
 pandas/core/arrays/base.py    | 10 +++++-----
 pandas/core/arrays/integer.py |  6 +++---
 pandas/core/dtypes/common.py  |  3 +--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 14df4d9bf8670..de07f2318eacd 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -59,14 +59,14 @@ class ExtensionArray(object):
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
 
-    One can implement methods to handle array reductions.
-
-    * _reduce
-
     The remaining methods implemented on this class should be performant,
     as they only compose abstract methods. Still, a more efficient
     implementation may be available, and these methods can be overridden.
 
+    One can implement methods to handle array reductions.
+
+    * _reduce
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -691,7 +691,7 @@ def _reduce(self, name, skipna=True, **kwargs):
         skipna : bool, default True
             if True, skip NaN values
         kwargs : dict
-            ddof is the only supported kwarg
+            Additional keyword arguments passed to the reduction function
 
         Returns
         -------
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index e31a7571f62d6..6f37526bbf49e 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -546,9 +546,9 @@ def _reduce(self, name, skipna=True, **kwargs):
         if name in ['any', 'all']:
             pass
 
-        # if we have a numeric op, provide coercion back to an integer
-        # type if possible
-        elif notna(result):
+        # if we have a preservable numeric op,
+        # provide coercion back to an integer type if possible
+        elif name in ['sum', 'min', 'max'] and notna(result):
             int_result = int(result)
             if int_result == result:
                 result = int_result
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index 69c47de2c519c..a9fc9d13d4ab3 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1237,8 +1237,7 @@ def is_datetime_or_timedelta_dtype(arr_or_dtype):
     if arr_or_dtype is None:
         return False
     tipo = _get_dtype_type(arr_or_dtype)
-    return (issubclass(tipo, (np.datetime64, np.timedelta64)) or
-            is_datetime64tz_dtype(arr_or_dtype))
+    return issubclass(tipo, (np.datetime64, np.timedelta64))
 
 
 def _is_unorderable_exception(e):

From 08f5830bbc13d1093e700865d2fca09caa29d364 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 6 Oct 2018 10:30:37 -0400
Subject: [PATCH 16/23] fix test

---
 pandas/tests/dtypes/test_common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 6c23dbb6c257e..882b2c156478a 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -386,10 +386,10 @@ def test_is_datetime_or_timedelta_dtype():
     assert not com.is_datetime_or_timedelta_dtype(str)
     assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
     assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
+    assert not com.is_datetime_or_timedelta_dtype(
+        DatetimeTZDtype("ns", "US/Eastern"))
 
     assert com.is_datetime_or_timedelta_dtype(np.datetime64)
-    assert com.is_datetime_or_timedelta_dtype(
-        DatetimeTZDtype("ns", "US/Eastern"))
     assert com.is_datetime_or_timedelta_dtype(np.timedelta64)
     assert com.is_datetime_or_timedelta_dtype(
         np.array([], dtype=np.timedelta64))

From 3e763c40797d006732e2ac2e31a1bb31c75a226a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 10 Oct 2018 20:27:24 -0400
Subject: [PATCH 17/23] comments

---
 pandas/core/arrays/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index de07f2318eacd..7e2e0da199a01 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -692,6 +692,7 @@ def _reduce(self, name, skipna=True, **kwargs):
             if True, skip NaN values
         kwargs : dict
             Additional keyword arguments passed to the reduction function
+            ddof is the only supported kwarg
 
         Returns
         -------

From b543386929344fef4565f3852e5ab68bbdf7ce00 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 11 Oct 2018 17:04:39 -0400
Subject: [PATCH 18/23] add op tests for reduction result dtype

---
 pandas/tests/arrays/test_integer.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 3f3653f375149..905bca38ac228 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -588,7 +588,7 @@ def test_cross_type_arithmetic():
 
 
 @pytest.mark.parametrize('op', ['sum', 'min', 'max'])
-def test_preserve_groupby_dtypes(op):
+def test_preserve_dtypes(op):
     # TODO(#22346): preserve Int64 dtype
     # for ops that enable (mean would actually work here
     # but generally it is a float return value)
@@ -598,6 +598,11 @@ def test_preserve_groupby_dtypes(op):
         "C": integer_array([1, None, 3], dtype='Int64'),
     })
 
+    # op
+    result = getattr(df.C, op)()
+    assert isinstance(result, int)
+
+    # groupby
     result = getattr(df.groupby("A"), op)()
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),

From b808778569ab23fc333afd7ccc17936714207bfa Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 11 Oct 2018 17:09:29 -0400
Subject: [PATCH 19/23] remove xfails on frame arithmetic ops comparisons

---
 pandas/tests/arrays/test_integer.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 905bca38ac228..fdf4bc91f9f77 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -114,6 +114,13 @@ def _check_op(self, s, op_name, other, exc=None):
         # compute expected
         mask = s.isna()
 
+        # if s is a DataFrame, squeeze to a Series
+        # for comparison
+        if isinstance(s, pd.DataFrame):
+            result = result.squeeze()
+            s = s.squeeze()
+            mask = mask.squeeze()
+
         # other array is an Integer
         if isinstance(other, IntegerArray):
             omask = getattr(other, 'mask', None)
@@ -215,7 +222,6 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
         s = pd.Series(data)
         self._check_op(s, op, 1, exc=TypeError)
 
-    @pytest.mark.xfail(run=False, reason="_reduce needs implementation")
     def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
         # frame & scalar
         op = all_arithmetic_operators

From 2b3d96f474ce425f49ab6d1caba88caca4729d9d Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 11 Oct 2018 17:37:27 -0400
Subject: [PATCH 20/23] remove groupby overrides

---
 pandas/tests/extension/test_integer.py | 30 +-------------------------
 1 file changed, 1 insertion(+), 29 deletions(-)

diff --git a/pandas/tests/extension/test_integer.py b/pandas/tests/extension/test_integer.py
index 163f2ad6fc72b..89c36bbe7b325 100644
--- a/pandas/tests/extension/test_integer.py
+++ b/pandas/tests/extension/test_integer.py
@@ -207,35 +207,7 @@ class TestCasting(base.BaseCastingTests):
 
 
 class TestGroupby(base.BaseGroupbyTests):
-
-    @pytest.mark.parametrize('as_index', [True, False])
-    def test_groupby_extension_agg(self, as_index, data_for_grouping):
-        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
-                           "B": data_for_grouping})
-        result = df.groupby("B", as_index=as_index).A.mean()
-        _, index = pd.factorize(data_for_grouping, sort=True)
-
-        # TODO(ExtensionIndex): remove coercion to object
-        # we don't have an easy way to represent an EA as an Index object
-        index = pd.Index(index, name="B", dtype=object)
-        expected = pd.Series([3, 1, 4], index=index, name="A")
-        if as_index:
-            self.assert_series_equal(result, expected)
-        else:
-            expected = expected.reset_index()
-            self.assert_frame_equal(result, expected)
-
-    def test_groupby_extension_no_sort(self, data_for_grouping):
-        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
-                           "B": data_for_grouping})
-        result = df.groupby("B", sort=False).A.mean()
-        _, index = pd.factorize(data_for_grouping, sort=False)
-
-        # TODO(ExtensionIndex): remove coercion to object
-        # we don't have an easy way to represent an EA as an Index object
-        index = pd.Index(index, name="B", dtype=object)
-        expected = pd.Series([1, 3, 4], index=index, name="A")
-        self.assert_series_equal(result, expected)
+    pass
 
 
 class TestNumericReduce(base.BaseNumericReduceTests):

From 3037ccd7f6ae82a1fe9308759d1ec505f45c8940 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 12 Oct 2018 11:48:55 +0200
Subject: [PATCH 21/23] format docstring

---
 pandas/core/arrays/base.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 7e2e0da199a01..ef7e25033f24e 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -680,19 +680,20 @@ def _ndarray_values(self):
         return np.array(self)
 
     def _reduce(self, name, skipna=True, **kwargs):
-        """Return a scalar result of performing the reduction operation.
+        """
+        Return a scalar result of performing the reduction operation.
 
         Parameters
         ----------
         name : str
-            name of the function, support values are:
-            {any, all, min, max, sum, mean, median, prod,
-             std, var, sem, kurt, skew}
+            Name of the function, supported values are:
+            { any, all, min, max, sum, mean, median, prod,
+            std, var, sem, kurt, skew }.
         skipna : bool, default True
-            if True, skip NaN values
-        kwargs : dict
-            Additional keyword arguments passed to the reduction function
-            ddof is the only supported kwarg
+            If True, skip NaN values.
+        **kwargs
+            Additional keyword arguments passed to the reduction function.
+            Currently, `ddof` is the only supported kwarg.
 
         Returns
         -------

From b8726078a1a8968921c482d1d7e9f5ccd6578945 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 12 Oct 2018 11:53:19 +0200
Subject: [PATCH 22/23] add test ensuring mean always gives float

---
 pandas/tests/arrays/test_integer.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index fdf4bc91f9f77..9102a5210d54b 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -617,6 +617,29 @@ def test_preserve_dtypes(op):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize('op', ['mean'])
+def test_reduce_to_float(op):
+    # some reduce ops always return float, even if the result
+    # is a rounded number
+    df = pd.DataFrame({
+        "A": ['a', 'b', 'b'],
+        "B": [1, None, 3],
+        "C": integer_array([1, None, 3], dtype='Int64'),
+    })
+
+    # op
+    result = getattr(df.C, op)()
+    assert isinstance(result, float)
+
+    # groupby
+    result = getattr(df.groupby("A"), op)()
+    expected = pd.DataFrame({
+        "B": np.array([1.0, 3.0]),
+        "C": np.array([1, 3], dtype="float64")
+    }, index=pd.Index(['a', 'b'], name='A'))
+    tm.assert_frame_equal(result, expected)
+
+
 def test_astype_nansafe():
     # https://github.com/pandas-dev/pandas/pull/22343
     arr = integer_array([np.nan, 1, 2], dtype="Int8")

From aeaf5f32c0939d6bedc99240b47dd54252dda96a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 12 Oct 2018 13:40:11 +0200
Subject: [PATCH 23/23] add prod to list of type preserving reductions

---
 pandas/core/arrays/integer.py       | 2 +-
 pandas/tests/arrays/test_integer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 6f37526bbf49e..9917045f2f7d2 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -548,7 +548,7 @@ def _reduce(self, name, skipna=True, **kwargs):
 
         # if we have a preservable numeric op,
         # provide coercion back to an integer type if possible
-        elif name in ['sum', 'min', 'max'] and notna(result):
+        elif name in ['sum', 'min', 'max', 'prod'] and notna(result):
             int_result = int(result)
             if int_result == result:
                 result = int_result
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index 9102a5210d54b..23ee8d217bd59 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -593,7 +593,7 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.parametrize('op', ['sum', 'min', 'max'])
+@pytest.mark.parametrize('op', ['sum', 'min', 'max', 'prod'])
 def test_preserve_dtypes(op):
     # TODO(#22346): preserve Int64 dtype
     # for ops that enable (mean would actually work here