From cdd78dbe4172e452874ac8b98593aea19bc7b877 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 24 May 2019 14:44:27 -0400
Subject: [PATCH 01/11] BUG: preserve categorical & sparse types when grouping
 / pivot

closes #18502
---
 doc/source/whatsnew/v0.25.0.rst              | 29 +++++++++++
 pandas/core/groupby/generic.py               | 11 +++-
 pandas/core/groupby/groupby.py               | 42 ++++++++++++----
 pandas/core/groupby/ops.py                   |  6 +--
 pandas/core/indexing.py                      |  2 +-
 pandas/core/internals/blocks.py              | 24 ++++++++-
 pandas/core/internals/construction.py        |  5 +-
 pandas/core/nanops.py                        |  9 ++--
 pandas/tests/groupby/test_function.py        | 53 ++++++++++----------
 pandas/tests/groupby/test_nth.py             | 19 ++++---
 pandas/tests/resample/test_datetime_index.py |  6 +++
 pandas/tests/sparse/test_groupby.py          | 10 ++--
 12 files changed, 154 insertions(+), 62 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 1980e00f1073d..0449f37d3ac28 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -322,6 +322,35 @@ of ``object`` dtype. :attr:`Series.str` will now infer the dtype data *within* t
     s
     s.str.startswith(b'a')
 
+.. _whatsnew_0250.api_breaking.groupby_categorical:
+
+Categorical dtypes are preserved during groupby
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Previously, columns that were categorical, but not the groupby key(s) would be converted to ``object`` dtype during groupby operations. Pandas now will preserve these dtypes. (:issue:`18502`)
+
+.. ipython:: python
+
+   df = pd.DataFrame(
+       {'payload': [-1, -2, -1, -2],
+        'col': pd.Categorical(["foo", "bar", "bar", "qux"], ordered=True)})
+   df
+   df.dtypes
+
+*Previous Behavior*:
+
+.. code-block:: python
+
+   In [5]: df.groupby('payload').first().col.dtype
+   Out[5]: dtype('O')
+
+*New Behavior*:
+
+.. ipython:: python
+
+   df.groupby('payload').first().col.dtype
+
+
 .. _whatsnew_0250.api_breaking.incompatible_index_unions:
 
 Incompatible Index type unions
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index a10920b7a5afb..f1cc54d5a460f 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -158,12 +158,19 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
 
                 obj = self.obj[data.items[locs]]
                 s = groupby(obj, self.grouper)
-                result = s.aggregate(lambda x: alt(x, axis=self.axis))
+                try:
+                    result = s.aggregate(lambda x: alt(x, axis=self.axis))
+                except Exception:
+                    # we may have an exception in trying to aggregate
+                    # continue and exclude the block
+                    pass
 
             finally:
 
+                dtype = block.values.dtype
+
                 # see if we can cast the block back to the original dtype
-                result = block._try_coerce_and_cast_result(result)
+                result = block._try_coerce_and_cast_result(result, dtype=dtype)
                 newb = block.make_block(result)
 
             new_items.append(locs)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 64cacd60da30f..202d4fb15f971 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -786,6 +786,8 @@ def _try_cast(self, result, obj, numeric_only=False):
             elif is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.
+
+                # return the same type (Series) as our caller
                 try:
                     result = obj._values._from_sequence(result, dtype=dtype)
                 except Exception:
@@ -1157,7 +1159,8 @@ def mean(self, *args, **kwargs):
         """
         nv.validate_groupby_func('mean', args, kwargs, ['numeric_only'])
         try:
-            return self._cython_agg_general('mean', **kwargs)
+            return self._cython_agg_general(
+                'mean', alt=lambda x, axis: Series(x).mean(**kwargs), **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
@@ -1179,7 +1182,11 @@ def median(self, **kwargs):
             Median of values within each group.
         """
         try:
-            return self._cython_agg_general('median', **kwargs)
+            return self._cython_agg_general(
+                'median',
+                alt=lambda x,
+                axis: Series(x).median(axis=axis, **kwargs),
+                **kwargs)
         except GroupByError:
             raise
         except Exception:  # pragma: no cover
@@ -1235,7 +1242,10 @@ def var(self, ddof=1, *args, **kwargs):
         nv.validate_groupby_func('var', args, kwargs)
         if ddof == 1:
             try:
-                return self._cython_agg_general('var', **kwargs)
+                return self._cython_agg_general(
+                    'var',
+                    alt=lambda x, axis: Series(x).var(ddof=ddof, **kwargs),
+                    **kwargs)
             except Exception:
                 f = lambda x: x.var(ddof=ddof, **kwargs)
                 with _group_selection_context(self):
@@ -1263,7 +1273,6 @@ def sem(self, ddof=1):
         Series or DataFrame
             Standard error of the mean of values within each group.
         """
-
         return self.std(ddof=ddof) / np.sqrt(self.count())
 
     @Substitution(name='groupby')
@@ -1290,7 +1299,7 @@ def _add_numeric_operations(cls):
         """
 
         def groupby_function(name, alias, npfunc,
-                             numeric_only=True, _convert=False,
+                             numeric_only=True,
                              min_count=-1):
 
             _local_template = """
@@ -1312,17 +1321,30 @@ def f(self, **kwargs):
                     kwargs['min_count'] = min_count
 
                 self._set_group_selection()
+
+                # try a cython aggregation if we can
                 try:
                     return self._cython_agg_general(
                         alias, alt=npfunc, **kwargs)
                 except AssertionError as e:
                     raise SpecificationError(str(e))
                 except Exception:
-                    result = self.aggregate(
-                        lambda x: npfunc(x, axis=self.axis))
-                    if _convert:
-                        result = result._convert(datetime=True)
-                    return result
+                    pass
+
+                # apply a non-cython aggregation
+                result = self.aggregate(
+                    lambda x: npfunc(x, axis=self.axis))
+
+                # coerce the resulting columns if we can
+                if isinstance(result, DataFrame):
+                    for col in result.columns:
+                        result[col] = self._try_cast(
+                            result[col], self.obj[col])
+                else:
+                    result = self._try_cast(
+                        result, self.obj)
+
+                return result
 
             set_function_name(f, name, cls)
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 010047a8be4ed..38478be5a8e07 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -19,7 +19,7 @@
 from pandas.core.dtypes.common import (
     ensure_float64, ensure_int64, ensure_int_or_float, ensure_object,
     ensure_platform_int, is_bool_dtype, is_categorical_dtype, is_complex_dtype,
-    is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype,
+    is_datetime64_any_dtype, is_integer_dtype, is_numeric_dtype, is_sparse,
     is_timedelta64_dtype, needs_i8_conversion)
 from pandas.core.dtypes.missing import _maybe_fill, isna
 
@@ -451,9 +451,9 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1,
 
         # categoricals are only 1d, so we
         # are not setup for dim transforming
-        if is_categorical_dtype(values):
+        if is_categorical_dtype(values) or is_sparse(values):
             raise NotImplementedError(
-                "categoricals are not support in cython ops ATM")
+                "{} are not support in cython ops".format(values.dtype))
         elif is_datetime64_any_dtype(values):
             if how in ['add', 'prod', 'cumsum', 'cumprod']:
                 raise NotImplementedError(
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 1539feb2e0856..6a1e09f6bb303 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -10,7 +10,7 @@
 
 from pandas.core.dtypes.common import (
     ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
-    is_list_like, is_numeric_dtype, is_scalar, is_sequence, is_sparse)
+    is_list_like, is_numeric_dtype, is_scalar, is_sequence)
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index db0eb44eabbfe..bb06bbbf6011d 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -594,7 +594,8 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
                         values = self.get_values(dtype=dtype)
 
                     # _astype_nansafe works fine with 1-d only
-                    values = astype_nansafe(values.ravel(), dtype, copy=True)
+                    values = astype_nansafe(
+                        values.ravel(), dtype, copy=True, **kwargs)
 
                 # TODO(extension)
                 # should we make this attribute?
@@ -1746,6 +1747,27 @@ def _slice(self, slicer):
 
         return self.values[slicer]
 
+    def _try_cast_result(self, result, dtype=None):
+        """
+        if we have an operation that operates on for example floats
+        we want to try to cast back to our EA here if possible
+
+        result could be a 2-D numpy array, e.g. the result of
+        a numeric operation; but it must be shape (1, X) because
+        we by-definition operate on the ExtensionBlocks one-by-one
+
+        result could also be an EA Array itself, in which case it
+        is already a 1-D array
+        """
+        try:
+
+            result = self._holder._from_sequence(
+                np.asarray(result).ravel(), dtype=dtype)
+        except Exception:
+            pass
+
+        return result
+
     def formatting_values(self):
         # Deprecating the ability to override _formatting_values.
         # Do the warning here, it's only user in pandas, since we
diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py
index 96b4ab7f3fbc6..0806e6e927e8d 100644
--- a/pandas/core/internals/construction.py
+++ b/pandas/core/internals/construction.py
@@ -687,7 +687,10 @@ def sanitize_array(data, index, dtype=None, copy=False,
                 data = np.array(data, dtype=dtype, copy=False)
             subarr = np.array(data, dtype=object, copy=copy)
 
-    if is_object_dtype(subarr.dtype) and dtype != 'object':
+    if (not (is_extension_array_dtype(subarr.dtype) or
+             is_extension_array_dtype(dtype)) and
+            is_object_dtype(subarr.dtype) and
+            not is_object_dtype(dtype)):
         inferred = lib.infer_dtype(subarr, skipna=False)
         if inferred == 'period':
             try:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 7923e463c7719..24a28bf0005cb 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -72,11 +72,12 @@ def _f(*args, **kwargs):
 
 class bottleneck_switch:
 
-    def __init__(self, **kwargs):
+    def __init__(self, name=None, **kwargs):
+        self.name = name
         self.kwargs = kwargs
 
     def __call__(self, alt):
-        bn_name = alt.__name__
+        bn_name = self.name or alt.__name__
 
         try:
             bn_func = getattr(bn, bn_name)
@@ -804,7 +805,8 @@ def nansem(values, axis=None, skipna=True, ddof=1, mask=None):
 
 
 def _nanminmax(meth, fill_value_typ):
-    @bottleneck_switch()
+
+    @bottleneck_switch(name='nan' + meth)
     def reduction(values, axis=None, skipna=True, mask=None):
 
         values, mask, dtype, dtype_max, fill_value = _get_values(
@@ -824,7 +826,6 @@ def reduction(values, axis=None, skipna=True, mask=None):
         result = _wrap_results(result, dtype, fill_value)
         return _maybe_null_out(result, axis, mask, values.shape)
 
-    reduction.__name__ = 'nan' + meth
     return reduction
 
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 14f27f0c4c7d8..e4303c0a07076 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -12,7 +12,7 @@
 from pandas import (
     DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna)
 import pandas.core.nanops as nanops
-from pandas.util import testing as tm
+from pandas.util import _test_decorators as td, testing as tm
 
 
 @pytest.mark.parametrize("agg_func", ['any', 'all'])
@@ -144,6 +144,7 @@ def test_arg_passthru():
         index=Index([1, 2], name='group'),
         columns=['int', 'float', 'category_int',
                  'datetime', 'datetimetz', 'timedelta'])
+
     for attr in ['mean', 'median']:
         f = getattr(df.groupby('group'), attr)
         result = f()
@@ -459,35 +460,33 @@ def test_groupby_cumprod():
     tm.assert_series_equal(actual, expected)
 
 
-def test_ops_general():
-    ops = [('mean', np.mean),
-           ('median', np.median),
-           ('std', np.std),
-           ('var', np.var),
-           ('sum', np.sum),
-           ('prod', np.prod),
-           ('min', np.min),
-           ('max', np.max),
-           ('first', lambda x: x.iloc[0]),
-           ('last', lambda x: x.iloc[-1]),
-           ('count', np.size), ]
-    try:
-        from scipy.stats import sem
-    except ImportError:
-        pass
-    else:
-        ops.append(('sem', sem))
+def scipy_sem(*args, **kwargs):
+    from scipy.stats import sem
+    return sem(*args, ddof=1, **kwargs)
+
+
+@pytest.mark.parametrize(
+    'op,targop',
+    [('mean', np.mean),
+     ('median', np.median),
+     ('std', np.std),
+     ('var', np.var),
+     ('sum', np.sum),
+     ('prod', np.prod),
+     ('min', np.min),
+     ('max', np.max),
+     ('first', lambda x: x.iloc[0]),
+     ('last', lambda x: x.iloc[-1]),
+     ('count', np.size),
+     pytest.param(
+         'sem', scipy_sem, marks=td.skip_if_no_scipy)])
+def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)
 
-    for op, targop in ops:
-        result = getattr(df.groupby(labels), op)().astype(float)
-        expected = df.groupby(labels).agg(targop)
-        try:
-            tm.assert_frame_equal(result, expected)
-        except BaseException as exc:
-            exc.args += ('operation: %s' % op, )
-            raise
+    result = getattr(df.groupby(labels), op)().astype(float)
+    expected = df.groupby(labels).agg(targop)
+    tm.assert_frame_equal(result, expected)
 
 
 def test_max_nan_bug():
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index 6a08a8d79b63e..b174fb0e0b6f9 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -282,18 +282,21 @@ def test_first_last_tz(data, expected_first, expected_last):
 ])
 def test_first_last_tz_multi_column(method, ts, alpha):
     # GH 21603
+    category_string = pd.Series(list('abc')).astype(
+        'category')
     df = pd.DataFrame({'group': [1, 1, 2],
-                       'category_string': pd.Series(list('abc')).astype(
-                           'category'),
+                       'category_string': category_string,
                        'datetimetz': pd.date_range('20130101', periods=3,
                                                    tz='US/Eastern')})
     result = getattr(df.groupby('group'), method)()
-    expepcted = pd.DataFrame({'category_string': [alpha, 'c'],
-                              'datetimetz': [ts,
-                                             Timestamp('2013-01-03',
-                                                       tz='US/Eastern')]},
-                             index=pd.Index([1, 2], name='group'))
-    assert_frame_equal(result, expepcted)
+    expected = pd.DataFrame(
+        {'category_string': pd.Categorical(
+            [alpha, 'c'], dtype=category_string.dtype),
+         'datetimetz': [ts,
+                        Timestamp('2013-01-03',
+                                  tz='US/Eastern')]},
+        index=pd.Index([1, 2], name='group'))
+    assert_frame_equal(result, expected)
 
 
 def test_nth_multi_index_as_expected():
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 5711174ef0c9f..830ba6062cc72 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -112,6 +112,12 @@ def test_resample_integerarray():
                       dtype="Int64")
     assert_series_equal(result, expected)
 
+    result = ts.resample('3T').mean()
+    expected = Series([1, 4, 7],
+                      index=pd.date_range('1/1/2000', periods=3, freq='3T'),
+                      dtype='Int64')
+    assert_series_equal(result, expected)
+
 
 def test_resample_basic_grouper(series):
     s = series
diff --git a/pandas/tests/sparse/test_groupby.py b/pandas/tests/sparse/test_groupby.py
index 531a4360c78a2..bf6055bc12725 100644
--- a/pandas/tests/sparse/test_groupby.py
+++ b/pandas/tests/sparse/test_groupby.py
@@ -29,11 +29,10 @@ def test_first_last_nth(self):
         sparse_grouped_last = sparse_grouped.last()
         sparse_grouped_nth = sparse_grouped.nth(1)
 
-        dense_grouped_first = dense_grouped.first().to_sparse()
-        dense_grouped_last = dense_grouped.last().to_sparse()
-        dense_grouped_nth = dense_grouped.nth(1).to_sparse()
+        dense_grouped_first = pd.DataFrame(dense_grouped.first().to_sparse())
+        dense_grouped_last = pd.DataFrame(dense_grouped.last().to_sparse())
+        dense_grouped_nth = pd.DataFrame(dense_grouped.nth(1).to_sparse())
 
-        # TODO: shouldn't these all be spares or not?
         tm.assert_frame_equal(sparse_grouped_first,
                               dense_grouped_first)
         tm.assert_frame_equal(sparse_grouped_last,
@@ -69,5 +68,6 @@ def test_groupby_includes_fill_value(fill_value):
                        'b': [fill_value, 1, fill_value, fill_value]})
     sdf = df.to_sparse(fill_value=fill_value)
     result = sdf.groupby('a').sum()
-    expected = df.groupby('a').sum().to_sparse(fill_value=fill_value)
+    expected = pd.DataFrame(df.groupby('a').sum().to_sparse(
+        fill_value=fill_value))
     tm.assert_frame_equal(result, expected, check_index_type=False)

From 6751d0b4d59a786b62045c3914ff9f462547d891 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 26 Jun 2019 21:46:04 -0500
Subject: [PATCH 02/11] typo

---
 pandas/core/indexing.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 6a1e09f6bb303..1539feb2e0856 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -10,7 +10,7 @@
 
 from pandas.core.dtypes.common import (
     ensure_platform_int, is_float, is_integer, is_integer_dtype, is_iterator,
-    is_list_like, is_numeric_dtype, is_scalar, is_sequence)
+    is_list_like, is_numeric_dtype, is_scalar, is_sequence, is_sparse)
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
 

From b8be789a2818db9fd175470505ba15566625d38a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 26 Jun 2019 22:18:57 -0500
Subject: [PATCH 03/11] moar tests

---
 pandas/tests/groupby/test_categorical.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index f24fa0daa5b18..58a43dc218d33 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -697,6 +697,27 @@ def test_preserve_categorical_dtype():
         tm.assert_frame_equal(result2, expected)
 
 
+@pytest.mark.parametrize(
+    'func, values',
+    [('first', ['second', 'first']),
+     ('last', ['fourth', 'third']),
+     ('min', ['fourth', 'first']),
+     ('max', ['second', 'third'])])
+def test_preserve_on_ordered_ops(func, values):
+    # gh-18502
+    # preserve the categoricals on ops
+    c = pd.Categorical(['first', 'second', 'third', 'fourth'], ordered=True)
+    df = pd.DataFrame(
+        {'payload': [-1, -2, -1, -2],
+         'col': c})
+    g = df.groupby('payload')
+    result = getattr(g, func)()
+    expected = pd.DataFrame(
+        {'payload': [-2, -1],
+         'col': pd.Series(values, dtype=c.dtype)}).set_index('payload')
+    tm.assert_frame_equal(result, expected)
+
+
 def test_categorical_no_compress():
     data = Series(np.random.randn(9))
 

From 31d4635f44148aeb69663306454efd95306f198a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 26 Jun 2019 22:44:23 -0500
Subject: [PATCH 04/11] use a fixed random seed

---
 pandas/tests/sparse/test_pivot.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 114e7b4bacd94..4b77e22024280 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -11,12 +11,13 @@
 class TestPivotTable:
 
     def setup_method(self, method):
+        rs = np.random.RandomState(0)
         self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                                          'foo', 'bar', 'foo', 'foo'],
                                    'B': ['one', 'one', 'two', 'three',
                                          'two', 'two', 'one', 'three'],
-                                   'C': np.random.randn(8),
-                                   'D': np.random.randn(8),
+                                   'C': rs.randn(8),
+                                   'D': rs.randn(8),
                                    'E': [np.nan, np.nan, 1, 2,
                                          np.nan, 1, np.nan, np.nan]})
         self.sparse = self.dense.to_sparse()

From ea98679a619ab0dfbfd29b7faaae91ac5b26e8da Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 09:39:37 -0500
Subject: [PATCH 05/11] xfail on np 1.17

---
 pandas/tests/sparse/test_pivot.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 4b77e22024280..2b6d2a4e63a0c 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -3,6 +3,7 @@
 
 import pandas as pd
 import pandas.util.testing as tm
+from pandas import _np_version_under1p17
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
@@ -48,6 +49,8 @@ def test_pivot_table(self):
         #                            values='E', aggfunc='sum')
         # tm.assert_frame_equal(res_sparse, res_dense)
 
+    @pytest.mark.xfail(not _np_version_under1p17,
+                       reason="failing occasionally on numpy > 1.17")
     def test_pivot_table_multi(self):
         res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
                                     values=['D', 'E'])

From 7ab00fa071c10654aabba6a4158d6c6685eb1713 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 10:02:45 -0500
Subject: [PATCH 06/11] lint

---
 pandas/tests/sparse/test_pivot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 2b6d2a4e63a0c..da5251ff4cbe0 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -2,8 +2,8 @@
 import pytest
 
 import pandas as pd
-import pandas.util.testing as tm
 from pandas import _np_version_under1p17
+import pandas.util.testing as tm
 
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")

From bad7553d315acdfd1b6fbf598eec574102033af6 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 11:26:01 -0500
Subject: [PATCH 07/11] groupby tests

---
 pandas/core/internals/blocks.py                |  2 +-
 pandas/tests/extension/base/groupby.py         | 12 ++++++++++++
 pandas/tests/extension/decimal/test_decimal.py |  6 +++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index bb06bbbf6011d..652f70746f618 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -1762,7 +1762,7 @@ def _try_cast_result(self, result, dtype=None):
         try:
 
             result = self._holder._from_sequence(
-                np.asarray(result).ravel(), dtype=dtype)
+                result.ravel(), dtype=dtype)
         except Exception:
             pass
 
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index 1929dad075695..daeec5923888c 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -64,6 +64,18 @@ def test_groupby_extension_apply(
         df.groupby("A").apply(groupby_apply_op)
         df.groupby("A").B.apply(groupby_apply_op)
 
+    def test_groupby_apply_identity(self, data_for_grouping):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping})
+        result = df.groupby('A').B.apply(lambda x: x.array)
+        expected = pd.Series([df.B.iloc[[0, 1, 6]].array,
+                              df.B.iloc[[2, 3]].array,
+                              df.B.iloc[[4, 5]].array,
+                              df.B.iloc[[7]].array],
+                             index=pd.Index([1, 2, 3, 4], name='A'),
+                             name='B')
+        self.assert_series_equal(result, expected)
+
     def test_in_numeric_groupby(self, data_for_grouping):
         df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
                            "B": data_for_grouping,
diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index 94c0b61c6382a..fbea5c80a6e12 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -192,7 +192,11 @@ class TestCasting(BaseDecimal, base.BaseCastingTests):
 
 
 class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
-    pass
+
+    @pytest.mark.xfail(
+        reason="needs to correctly define __eq__ to handle nans, xref #27081.")
+    def test_groupby_apply_identity(self, data_for_grouping):
+        super().test_groupby_apply_idendeity(data_for_grouping)
 
 
 class TestSetitem(BaseDecimal, base.BaseSetitemTests):

From 41e11e130347466f7d0c9d4dd5aba390282b85bb Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 13:24:07 -0500
Subject: [PATCH 08/11] use strict=False

---
 pandas/tests/sparse/test_pivot.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index da5251ff4cbe0..1841571eb61c8 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -50,7 +50,8 @@ def test_pivot_table(self):
         # tm.assert_frame_equal(res_sparse, res_dense)
 
     @pytest.mark.xfail(not _np_version_under1p17,
-                       reason="failing occasionally on numpy > 1.17")
+                       reason="failing occasionally on numpy > 1.17",
+                       strict=False)
     def test_pivot_table_multi(self):
         res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
                                     values=['D', 'E'])

From ccfcca01cc9b87c19bcb821803ad02ed126cfe4a Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 15:09:33 -0500
Subject: [PATCH 09/11] review comments

---
 pandas/core/groupby/generic.py    |  2 +-
 pandas/tests/sparse/test_pivot.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index f1cc54d5a460f..7c8c7956f8cb4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -160,7 +160,7 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True,
                 s = groupby(obj, self.grouper)
                 try:
                     result = s.aggregate(lambda x: alt(x, axis=self.axis))
-                except Exception:
+                except TypeError:
                     # we may have an exception in trying to aggregate
                     # continue and exclude the block
                     pass
diff --git a/pandas/tests/sparse/test_pivot.py b/pandas/tests/sparse/test_pivot.py
index 1841571eb61c8..8f98117f20208 100644
--- a/pandas/tests/sparse/test_pivot.py
+++ b/pandas/tests/sparse/test_pivot.py
@@ -42,12 +42,12 @@ def test_pivot_table(self):
                                    values='E', aggfunc='mean')
         tm.assert_frame_equal(res_sparse, res_dense)
 
-        # ToDo: sum doesn't handle nan properly
-        # res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
-        #                             values='E', aggfunc='sum')
-        # res_dense = pd.pivot_table(self.dense, index='A', columns='B',
-        #                            values='E', aggfunc='sum')
-        # tm.assert_frame_equal(res_sparse, res_dense)
+    def test_pivot_table_with_nans(self):
+        res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
+                                    values='E', aggfunc='sum')
+        res_dense = pd.pivot_table(self.dense, index='A', columns='B',
+                                   values='E', aggfunc='sum')
+        tm.assert_frame_equal(res_sparse, res_dense)
 
     @pytest.mark.xfail(not _np_version_under1p17,
                        reason="failing occasionally on numpy > 1.17",

From 48e7c32e6b4c39103bf9cc0cb3374d9363a27778 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 16:49:08 -0500
Subject: [PATCH 10/11] typo

---
 pandas/tests/extension/decimal/test_decimal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py
index fbea5c80a6e12..ecef835a9c797 100644
--- a/pandas/tests/extension/decimal/test_decimal.py
+++ b/pandas/tests/extension/decimal/test_decimal.py
@@ -196,7 +196,7 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
     @pytest.mark.xfail(
         reason="needs to correctly define __eq__ to handle nans, xref #27081.")
     def test_groupby_apply_identity(self, data_for_grouping):
-        super().test_groupby_apply_idendeity(data_for_grouping)
+        super().test_groupby_apply_identity(data_for_grouping)
 
 
 class TestSetitem(BaseDecimal, base.BaseSetitemTests):

From 3a6a0c08dc5a65427f29fbf4c989b2b895b004aa Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 27 Jun 2019 18:12:26 -0500
Subject: [PATCH 11/11] fix doc warning on master

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 0449f37d3ac28..8f677b1f7dc76 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -838,7 +838,7 @@ ExtensionArray
 
 - Bug in :func:`factorize` when passing an ``ExtensionArray`` with a custom ``na_sentinel`` (:issue:`25696`).
 - :meth:`Series.count` miscounts NA values in ExtensionArrays (:issue:`26835`)
-- Keyword argument ``deep`` has been removed from :method:`ExtensionArray.copy` (:issue:`27083`)
+- Keyword argument ``deep`` has been removed from :meth:`ExtensionArray.copy` (:issue:`27083`)
 
 Other
 ^^^^^