We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 08a0cb0 commit 8f3afd7Copy full SHA for 8f3afd7
doc/source/whatsnew/v1.1.0.rst
@@ -847,6 +847,7 @@ ExtensionArray
847
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
848
- Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`)
849
- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
850
+- Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`)
851
- Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
852
853
Other
pandas/core/arrays/boolean.py
@@ -93,6 +93,10 @@ def __repr__(self) -> str:
93
def _is_boolean(self) -> bool:
94
return True
95
96
+ @property
97
+ def _is_numeric(self) -> bool:
98
+ return True
99
+
100
def __from_arrow__(
101
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
102
) -> "BooleanArray":
pandas/core/dtypes/cast.py
@@ -312,12 +312,14 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
312
DtypeObj
313
The desired dtype of the result.
314
"""
315
- d = {
316
- (np.dtype(np.bool), "add"): np.dtype(np.int64),
317
- (np.dtype(np.bool), "cumsum"): np.dtype(np.int64),
318
- (np.dtype(np.bool), "sum"): np.dtype(np.int64),
319
- }
320
- return d.get((dtype, how), dtype)
+ from pandas.core.arrays.boolean import BooleanDtype
+ from pandas.core.arrays.integer import Int64Dtype
+ if how in ["add", "cumsum", "sum"] and (dtype == np.dtype(np.bool)):
+ return np.dtype(np.int64)
+ elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype):
321
+ return Int64Dtype()
322
+ return dtype
323
324
325
def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
pandas/core/groupby/ops.py
@@ -485,7 +485,7 @@ def _cython_operation(
485
values = values.view("int64")
486
is_numeric = True
487
elif is_bool_dtype(values.dtype):
488
- values = ensure_float64(values)
+ values = ensure_int_or_float(values)
489
elif is_integer_dtype(values):
490
# we use iNaT for the missing value on ints
491
# so pre-convert to guard this condition
pandas/tests/extension/test_boolean.py
@@ -326,6 +326,23 @@ def test_in_numeric_groupby(self, data_for_grouping):
326
327
tm.assert_index_equal(result, expected)
328
329
+ @pytest.mark.parametrize("min_count", [0, 10])
330
+ def test_groupby_sum_mincount(self, data_for_grouping, min_count):
331
+ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
332
+ result = df.groupby("A").sum(min_count=min_count)
333
+ if min_count == 0:
334
+ expected = pd.DataFrame(
335
+ {"B": pd.array([3, 0, 0], dtype="Int64")},
336
+ index=pd.Index([1, 2, 3], name="A"),
337
+ )
338
+ tm.assert_frame_equal(result, expected)
339
+ else:
340
341
+ {"B": pd.array([pd.NA] * 3, dtype="Int64")},
342
343
344
345
346
347
class TestNumericReduce(base.BaseNumericReduceTests):
348
def check_reduce(self, s, op_name, skipna):
0 commit comments