From 506421798b0a73b36a246ad7bf9fa8c9564bfb66 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 14 Aug 2018 10:08:00 -0500
Subject: [PATCH 1/6] API: ExtensionDtype._is_numeric

---
 doc/source/whatsnew/v0.24.0.txt                |  1 +
 pandas/core/arrays/integer.py                  |  4 ++++
 pandas/core/dtypes/base.py                     | 17 +++++++++++++++++
 pandas/core/internals/blocks.py                |  8 +++++++-
 pandas/tests/extension/base/groupby.py         | 13 +++++++++++++
 pandas/tests/extension/base/interface.py       |  4 ++++
 pandas/tests/extension/decimal/array.py        |  4 ++++
 pandas/tests/extension/integer/test_integer.py | 15 +++++++++++++++
 8 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
index cf12759c051fc..c1765b773b6a1 100644
--- a/doc/source/whatsnew/v0.24.0.txt
+++ b/doc/source/whatsnew/v0.24.0.txt
@@ -442,6 +442,7 @@ ExtensionType Changes
 - ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
 - ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
   the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
+- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
 - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
 - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
 - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index c126117060c3d..b818a860f9aa7 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -45,6 +45,10 @@ def is_signed_integer(self):
     def is_unsigned_integer(self):
         return self.kind == 'u'
 
+    @property
+    def _is_numeric(self):
+        return True
+
     @cache_readonly
     def numpy_dtype(self):
         """ Return an instance of our numpy dtype """
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 5f405e0d10657..2c90f0f7882a6 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -94,6 +94,18 @@ def is_dtype(cls, dtype):
         except TypeError:
             return False
 
+    @property
+    def _is_numeric(self):
+        # type: () -> bool
+        """
+        Whether columns with this dtype should be considered numeric.
+
+        By default ExtensionDtypes are assumed to be non-numeric.
+        They'll be excluded from operations that exclude non-numeric
+        columns, like groupby reductions.
+        """
+        return False
+
 
 class ExtensionDtype(_DtypeOpsMixin):
     """A custom data type, to be paired with an ExtensionArray.
@@ -109,6 +121,11 @@ class ExtensionDtype(_DtypeOpsMixin):
     * name
     * construct_from_string
 
+    The following attributes influence the behavior of the dtype in
+    pandas operations
+
+    * _is_numeric
+
     Optionally one can override construct_array_type for construction
     with the name of this dtype via the Registry
 
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index f0635014b166b..b8f9ab6ee2f60 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -669,7 +669,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
             newb = self.copy() if copy else self
 
         if newb.is_numeric and self.is_numeric:
-            if newb.shape != self.shape:
+            # use values.shape, rather than newb.shape, as newb.shape
+            # may be incorrect for ExtensionBlocks.
+            if values.shape != self.shape:
                 raise TypeError(
                     "cannot set astype for copy = [{copy}] for dtype "
                     "({dtype} [{itemsize}]) with smaller itemsize than "
@@ -1947,6 +1949,10 @@ def is_view(self):
         """Extension arrays are never treated as views."""
         return False
 
+    @property
+    def is_numeric(self):
+        return self.values.dtype._is_numeric
+
     def setitem(self, indexer, value, mgr=None):
         """Set the value inplace, returning a same-typed block.
 
diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py
index a29ef2a509a63..174997c7d51e1 100644
--- a/pandas/tests/extension/base/groupby.py
+++ b/pandas/tests/extension/base/groupby.py
@@ -67,3 +67,16 @@ def test_groupby_extension_apply(self, data_for_grouping, op):
         df.groupby("B").A.apply(op)
         df.groupby("A").apply(op)
         df.groupby("A").B.apply(op)
+
+    def test_in_numeric_groupby(self, data_for_grouping):
+        df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
+                           "B": data_for_grouping,
+                           "C": [1, 1, 1, 1, 1, 1, 1, 1]})
+        result = df.groupby("A").sum().columns
+
+        if data_for_grouping.dtype._is_numeric:
+            expected = pd.Index(['B', 'C'])
+        else:
+            expected = pd.Index(['C'])
+
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py
index 69de0e1900831..99c3b92541cbd 100644
--- a/pandas/tests/extension/base/interface.py
+++ b/pandas/tests/extension/base/interface.py
@@ -67,3 +67,7 @@ def test_no_values_attribute(self, data):
         # code, disallowing this for now until solved
         assert not hasattr(data, 'values')
         assert not hasattr(data, '_values')
+
+    def test_is_numeric_honored(self, data):
+        result = pd.Series(data)
+        assert result._data.blocks[0].is_numeric is data.dtype._is_numeric
diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
index 108b8874b3ac5..3d28ab9978f38 100644
--- a/pandas/tests/extension/decimal/array.py
+++ b/pandas/tests/extension/decimal/array.py
@@ -33,6 +33,10 @@ def construct_from_string(cls, string):
             raise TypeError("Cannot construct a '{}' from "
                             "'{}'".format(cls, string))
 
+    @property
+    def _is_numeric(self):
+        return True
+
 
 class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
     dtype = DecimalDtype()
diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index 5e0f5bf0a5dcf..efc690a487d22 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -697,6 +697,21 @@ def test_cross_type_arithmetic():
     tm.assert_series_equal(result, expected)
 
 
+def test_groupby_mean_included():
+    df = pd.DataFrame({
+        "A": ['a', 'b', 'b'],
+        "B": [1, None, 3],
+        "C": IntegerArray([1, None, 3], dtype='Int64'),
+    })
+
+    result = df.groupby("A").sum()
+    expected = pd.DataFrame({
+        "B": np.array([1.0, 3.0]),
+        "C": IntegerArray([1, 3], dtype="Int64")
+    })
+    tm.assert_frame_equal(result, expected)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift

From 50de326a37873d8c6667fd3f33e36cddaa8af9b4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 14 Aug 2018 11:49:51 -0500
Subject: [PATCH 2/6] fixed test

---
 pandas/tests/extension/integer/test_integer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/extension/integer/test_integer.py b/pandas/tests/extension/integer/test_integer.py
index efc690a487d22..7b374d8331cae 100644
--- a/pandas/tests/extension/integer/test_integer.py
+++ b/pandas/tests/extension/integer/test_integer.py
@@ -705,10 +705,11 @@ def test_groupby_mean_included():
     })
 
     result = df.groupby("A").sum()
+    # TODO(#22346): preserve Int64 dtype
     expected = pd.DataFrame({
         "B": np.array([1.0, 3.0]),
-        "C": IntegerArray([1, 3], dtype="Int64")
-    })
+        "C": np.array([1, 3], dtype="int64")
+    }, index=pd.Index(['a', 'b'], name='A'))
     tm.assert_frame_equal(result, expected)
 
 

From 1d96d22681abe15fe9666a3f3f7f99824c9de9df Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Tue, 14 Aug 2018 23:43:12 -0600
Subject: [PATCH 3/6] added test for DataFrame._get_numeric_data

---
 pandas/tests/frame/test_block_internals.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index 8e012922d25f1..d096daaa0b664 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -11,7 +11,8 @@
 import numpy as np
 
 from pandas import (DataFrame, Series, Timestamp, date_range, compat,
-                    option_context)
+                    option_context, Categorical)
+from pandas.core.arrays import IntegerArray, IntervalArray
 from pandas.compat import StringIO
 import pandas as pd
 
@@ -436,6 +437,17 @@ def test_get_numeric_data(self):
         expected = df
         assert_frame_equal(result, expected)
 
+    def test_get_numeric_data_extension_dtype(self):
+        # GH 22290
+        df = DataFrame({
+            'A': IntegerArray([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
+            'B': Categorical(list('abcabc')),
+            'C': IntegerArray([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
+            'D': IntervalArray.from_breaks(range(7))})
+        result = df._get_numeric_data()
+        expected = df.loc[:, ['A', 'C']]
+        assert_frame_equal(result, expected)
+
     def test_convert_objects(self):
 
         oops = self.mixed_frame.T.T

From db9af360aa7ea8ed9ad6911a92de839d269e41be Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 15 Aug 2018 07:37:37 -0500
Subject: [PATCH 4/6] Pass ndim

---
 pandas/core/internals/blocks.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index b8f9ab6ee2f60..b568d234b8558 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -662,16 +662,14 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
                     pass
 
             newb = make_block(values, placement=self.mgr_locs,
-                              klass=klass)
+                              klass=klass, ndim=self.ndim)
         except:
             if errors == 'raise':
                 raise
             newb = self.copy() if copy else self
 
         if newb.is_numeric and self.is_numeric:
-            # use values.shape, rather than newb.shape, as newb.shape
-            # may be incorrect for ExtensionBlocks.
-            if values.shape != self.shape:
+            if newb.shape != self.shape:
                 raise TypeError(
                     "cannot set astype for copy = [{copy}] for dtype "
                     "({dtype} [{itemsize}]) with smaller itemsize than "

From a3fdc2ae9baafa7feceeb0cb67066b1a4ed52951 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 15 Aug 2018 07:40:42 -0500
Subject: [PATCH 5/6] Note plotting

---
 pandas/core/dtypes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index 2c90f0f7882a6..c6bdb8656c3c6 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -102,7 +102,7 @@ def _is_numeric(self):
 
         By default ExtensionDtypes are assumed to be non-numeric.
         They'll be excluded from operations that exclude non-numeric
-        columns, like groupby reductions.
+        columns, like groupby reductions, plotting, etc.
         """
         return False
 

From 2779419b7d502d6279aabbd5332e6aabc1d73448 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 20 Aug 2018 13:16:31 +0200
Subject: [PATCH 6/6] small edit

---
 pandas/core/dtypes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
index c6bdb8656c3c6..1ecb6234ad2d9 100644
--- a/pandas/core/dtypes/base.py
+++ b/pandas/core/dtypes/base.py
@@ -102,7 +102,7 @@ def _is_numeric(self):
 
         By default ExtensionDtypes are assumed to be non-numeric.
         They'll be excluded from operations that exclude non-numeric
-        columns, like groupby reductions, plotting, etc.
+        columns, like (groupby) reductions, plotting, etc.
         """
         return False