Skip to content

Commit cdcf309

Browse files
Merge fc34131 into 2a2d1cf
2 parents 2a2d1cf + fc34131 commit cdcf309

File tree

9 files changed

+77
-2
lines changed

9 files changed

+77
-2
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ ExtensionType Changes
442442
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
443443
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
444444
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
445+
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
445446
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
446447
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
447448
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)

pandas/core/arrays/integer.py

+4
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ def is_signed_integer(self):
4545
def is_unsigned_integer(self):
4646
return self.kind == 'u'
4747

48+
@property
49+
def _is_numeric(self):
50+
return True
51+
4852
@cache_readonly
4953
def numpy_dtype(self):
5054
""" Return an instance of our numpy dtype """

pandas/core/dtypes/base.py

+17
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,18 @@ def is_dtype(cls, dtype):
9494
except TypeError:
9595
return False
9696

97+
@property
98+
def _is_numeric(self):
99+
# type: () -> bool
100+
"""
101+
Whether columns with this dtype should be considered numeric.
102+
103+
By default ExtensionDtypes are assumed to be non-numeric.
104+
They'll be excluded from operations that exclude non-numeric
105+
columns, like groupby reductions, plotting, etc.
106+
"""
107+
return False
108+
97109

98110
class ExtensionDtype(_DtypeOpsMixin):
99111
"""A custom data type, to be paired with an ExtensionArray.
@@ -109,6 +121,11 @@ class ExtensionDtype(_DtypeOpsMixin):
109121
* name
110122
* construct_from_string
111123
124+
The following attributes influence the behavior of the dtype in
125+
pandas operations
126+
127+
* _is_numeric
128+
112129
Optionally one can override construct_array_type for construction
113130
with the name of this dtype via the Registry
114131

pandas/core/internals/blocks.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -662,7 +662,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
662662
pass
663663

664664
newb = make_block(values, placement=self.mgr_locs,
665-
klass=klass)
665+
klass=klass, ndim=self.ndim)
666666
except:
667667
if errors == 'raise':
668668
raise
@@ -1947,6 +1947,10 @@ def is_view(self):
19471947
"""Extension arrays are never treated as views."""
19481948
return False
19491949

1950+
@property
1951+
def is_numeric(self):
1952+
return self.values.dtype._is_numeric
1953+
19501954
def setitem(self, indexer, value, mgr=None):
19511955
"""Set the value inplace, returning a same-typed block.
19521956

pandas/tests/extension/base/groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,16 @@ def test_groupby_extension_apply(self, data_for_grouping, op):
6767
df.groupby("B").A.apply(op)
6868
df.groupby("A").apply(op)
6969
df.groupby("A").B.apply(op)
70+
71+
def test_in_numeric_groupby(self, data_for_grouping):
72+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
73+
"B": data_for_grouping,
74+
"C": [1, 1, 1, 1, 1, 1, 1, 1]})
75+
result = df.groupby("A").sum().columns
76+
77+
if data_for_grouping.dtype._is_numeric:
78+
expected = pd.Index(['B', 'C'])
79+
else:
80+
expected = pd.Index(['C'])
81+
82+
tm.assert_index_equal(result, expected)

pandas/tests/extension/base/interface.py

+4
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,7 @@ def test_no_values_attribute(self, data):
6767
# code, disallowing this for now until solved
6868
assert not hasattr(data, 'values')
6969
assert not hasattr(data, '_values')
70+
71+
def test_is_numeric_honored(self, data):
72+
result = pd.Series(data)
73+
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric

pandas/tests/extension/decimal/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def construct_from_string(cls, string):
3333
raise TypeError("Cannot construct a '{}' from "
3434
"'{}'".format(cls, string))
3535

36+
@property
37+
def _is_numeric(self):
38+
return True
39+
3640

3741
class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
3842
dtype = DecimalDtype()

pandas/tests/extension/integer/test_integer.py

+16
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,22 @@ def test_cross_type_arithmetic():
697697
tm.assert_series_equal(result, expected)
698698

699699

700+
def test_groupby_mean_included():
701+
df = pd.DataFrame({
702+
"A": ['a', 'b', 'b'],
703+
"B": [1, None, 3],
704+
"C": IntegerArray([1, None, 3], dtype='Int64'),
705+
})
706+
707+
result = df.groupby("A").sum()
708+
# TODO(#22346): preserve Int64 dtype
709+
expected = pd.DataFrame({
710+
"B": np.array([1.0, 3.0]),
711+
"C": np.array([1, 3], dtype="int64")
712+
}, index=pd.Index(['a', 'b'], name='A'))
713+
tm.assert_frame_equal(result, expected)
714+
715+
700716
# TODO(jreback) - these need testing / are broken
701717

702718
# shift

pandas/tests/frame/test_block_internals.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
import numpy as np
1212

1313
from pandas import (DataFrame, Series, Timestamp, date_range, compat,
14-
option_context)
14+
option_context, Categorical)
15+
from pandas.core.arrays import IntegerArray, IntervalArray
1516
from pandas.compat import StringIO
1617
import pandas as pd
1718

@@ -436,6 +437,17 @@ def test_get_numeric_data(self):
436437
expected = df
437438
assert_frame_equal(result, expected)
438439

440+
def test_get_numeric_data_extension_dtype(self):
441+
# GH 22290
442+
df = DataFrame({
443+
'A': IntegerArray([-10, np.nan, 0, 10, 20, 30], dtype='Int64'),
444+
'B': Categorical(list('abcabc')),
445+
'C': IntegerArray([0, 1, 2, 3, np.nan, 5], dtype='UInt8'),
446+
'D': IntervalArray.from_breaks(range(7))})
447+
result = df._get_numeric_data()
448+
expected = df.loc[:, ['A', 'C']]
449+
assert_frame_equal(result, expected)
450+
439451
def test_convert_objects(self):
440452

441453
oops = self.mixed_frame.T.T

0 commit comments

Comments
 (0)