Skip to content

Commit afde64d

Browse files
committed
Merge branch 'ea-is-numeric' into ea-sparse-2
2 parents 88b73c3 + 50de326 commit afde64d

File tree

8 files changed

+51
-1
lines changed

8 files changed

+51
-1
lines changed

doc/source/whatsnew/v0.24.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ ExtensionType Changes
458458
- ``ExtensionArray`` has gained the abstract methods ``.dropna()`` (:issue:`21185`)
459459
- ``ExtensionDtype`` has gained the ability to instantiate from string dtypes, e.g. ``decimal`` would instantiate a registered ``DecimalDtype``; furthermore
460460
the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`)
461+
- Added ``ExtensionDtype._is_numeric`` for controlling whether an extension dtype is considered numeric (:issue:`22290`).
461462
- The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`)
462463
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
463464
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)

pandas/core/arrays/integer.py

+4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@ def is_signed_integer(self):
4646
def is_unsigned_integer(self):
4747
return self.kind == 'u'
4848

49+
@property
50+
def _is_numeric(self):
51+
return True
52+
4953
@cache_readonly
5054
def numpy_dtype(self):
5155
""" Return an instance of our numpy dtype """

pandas/core/dtypes/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ def is_dtype(cls, dtype):
9696

9797
@property
9898
def _is_numeric(self):
99+
# type: () -> bool
99100
"""
100101
Whether columns with this dtype should be considered numeric.
101102
@@ -120,8 +121,13 @@ class ExtensionDtype(_DtypeOpsMixin):
120121
* name
121122
* construct_from_string
122123
124+
<<<<<<< HEAD
123125
The following properties affect the behavior of extension arrays
124126
in operations:
127+
=======
128+
The following attributes influence the behavior of the dtype in
129+
pandas operations
130+
>>>>>>> ea-is-numeric
125131
126132
* _is_numeric
127133

pandas/core/internals/blocks.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -676,7 +676,9 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
676676
newb = self.copy() if copy else self
677677

678678
if newb.is_numeric and self.is_numeric:
679-
if newb.shape != self.shape:
679+
# use values.shape, rather than newb.shape, as newb.shape
680+
# may be incorrect for ExtensionBlocks.
681+
if values.shape != self.shape:
680682
raise TypeError(
681683
"cannot set astype for copy = [{copy}] for dtype "
682684
"({dtype} [{itemsize}]) with smaller itemsize than "

pandas/tests/extension/base/groupby.py

+13
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,16 @@ def test_groupby_extension_apply(self, data_for_grouping, op):
6767
df.groupby("B").A.apply(op)
6868
df.groupby("A").apply(op)
6969
df.groupby("A").B.apply(op)
70+
71+
def test_in_numeric_groupby(self, data_for_grouping):
72+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4],
73+
"B": data_for_grouping,
74+
"C": [1, 1, 1, 1, 1, 1, 1, 1]})
75+
result = df.groupby("A").sum().columns
76+
77+
if data_for_grouping.dtype._is_numeric:
78+
expected = pd.Index(['B', 'C'])
79+
else:
80+
expected = pd.Index(['C'])
81+
82+
tm.assert_index_equal(result, expected)

pandas/tests/extension/base/interface.py

+4
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,7 @@ def test_no_values_attribute(self, data):
6767
# code, disallowing this for now until solved
6868
assert not hasattr(data, 'values')
6969
assert not hasattr(data, '_values')
70+
71+
def test_is_numeric_honored(self, data):
72+
result = pd.Series(data)
73+
assert result._data.blocks[0].is_numeric is data.dtype._is_numeric

pandas/tests/extension/decimal/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,10 @@ def construct_from_string(cls, string):
4444
raise TypeError("Cannot construct a '{}' from "
4545
"'{}'".format(cls, string))
4646

47+
@property
48+
def _is_numeric(self):
49+
return True
50+
4751

4852
class DecimalArray(ExtensionArray, ExtensionScalarOpsMixin):
4953

pandas/tests/extension/integer/test_integer.py

+16
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,22 @@ def test_cross_type_arithmetic():
704704
tm.assert_series_equal(result, expected)
705705

706706

707+
def test_groupby_mean_included():
708+
df = pd.DataFrame({
709+
"A": ['a', 'b', 'b'],
710+
"B": [1, None, 3],
711+
"C": IntegerArray([1, None, 3], dtype='Int64'),
712+
})
713+
714+
result = df.groupby("A").sum()
715+
# TODO(#22346): preserve Int64 dtype
716+
expected = pd.DataFrame({
717+
"B": np.array([1.0, 3.0]),
718+
"C": np.array([1, 3], dtype="int64")
719+
}, index=pd.Index(['a', 'b'], name='A'))
720+
tm.assert_frame_equal(result, expected)
721+
722+
707723
# TODO(jreback) - these need testing / are broken
708724

709725
# shift

0 commit comments

Comments
 (0)