Skip to content

Commit 8f3afd7

Browse files
authored
BUG: Make nullable booleans numeric (#34056)
1 parent 08a0cb0 commit 8f3afd7

File tree

5 files changed

+31
-7
lines changed

5 files changed

+31
-7
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,7 @@ ExtensionArray
847847
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
848848
- Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`)
849849
- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
850+
- Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`)
850851
- Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
851852

852853
Other

pandas/core/arrays/boolean.py

+4
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ def __repr__(self) -> str:
9393
def _is_boolean(self) -> bool:
9494
return True
9595

96+
@property
97+
def _is_numeric(self) -> bool:
98+
return True
99+
96100
def __from_arrow__(
97101
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
98102
) -> "BooleanArray":

pandas/core/dtypes/cast.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -312,12 +312,14 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
312312
DtypeObj
313313
The desired dtype of the result.
314314
"""
315-
d = {
316-
(np.dtype(np.bool), "add"): np.dtype(np.int64),
317-
(np.dtype(np.bool), "cumsum"): np.dtype(np.int64),
318-
(np.dtype(np.bool), "sum"): np.dtype(np.int64),
319-
}
320-
return d.get((dtype, how), dtype)
315+
from pandas.core.arrays.boolean import BooleanDtype
316+
from pandas.core.arrays.integer import Int64Dtype
317+
318+
if how in ["add", "cumsum", "sum"] and (dtype == np.dtype(np.bool)):
319+
return np.dtype(np.int64)
320+
elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype):
321+
return Int64Dtype()
322+
return dtype
321323

322324

323325
def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):

pandas/core/groupby/ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -485,7 +485,7 @@ def _cython_operation(
485485
values = values.view("int64")
486486
is_numeric = True
487487
elif is_bool_dtype(values.dtype):
488-
values = ensure_float64(values)
488+
values = ensure_int_or_float(values)
489489
elif is_integer_dtype(values):
490490
# we use iNaT for the missing value on ints
491491
# so pre-convert to guard this condition

pandas/tests/extension/test_boolean.py

+17
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,23 @@ def test_in_numeric_groupby(self, data_for_grouping):
326326

327327
tm.assert_index_equal(result, expected)
328328

329+
@pytest.mark.parametrize("min_count", [0, 10])
330+
def test_groupby_sum_mincount(self, data_for_grouping, min_count):
331+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
332+
result = df.groupby("A").sum(min_count=min_count)
333+
if min_count == 0:
334+
expected = pd.DataFrame(
335+
{"B": pd.array([3, 0, 0], dtype="Int64")},
336+
index=pd.Index([1, 2, 3], name="A"),
337+
)
338+
tm.assert_frame_equal(result, expected)
339+
else:
340+
expected = pd.DataFrame(
341+
{"B": pd.array([pd.NA] * 3, dtype="Int64")},
342+
index=pd.Index([1, 2, 3], name="A"),
343+
)
344+
tm.assert_frame_equal(result, expected)
345+
329346

330347
class TestNumericReduce(base.BaseNumericReduceTests):
331348
def check_reduce(self, s, op_name, skipna):

0 commit comments

Comments
 (0)