Skip to content

BUG: Make nullable booleans numeric #34056

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 11, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,7 @@ ExtensionArray
- Fixed bug that caused :meth:`Series.__repr__()` to crash for extension types whose elements are multidimensional arrays (:issue:`33770`).
- Fixed bug where :meth:`Series.update` would raise a ``ValueError`` for ``ExtensionArray`` dtypes with missing values (:issue:`33980`)
- Fixed bug where :meth:`StringArray.memory_usage` was not implemented (:issue:`33963`)
- Fixed bug where :meth:`DataFrameGroupBy` would ignore the ``min_count`` argument for aggregations on nullable boolean dtypes (:issue:`34051`)
- Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)


Expand Down
4 changes: 4 additions & 0 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ def __repr__(self) -> str:
def _is_boolean(self) -> bool:
return True

@property
def _is_numeric(self) -> bool:
return True

def __from_arrow__(
self, array: Union["pyarrow.Array", "pyarrow.ChunkedArray"]
) -> "BooleanArray":
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,12 +312,14 @@ def maybe_cast_result_dtype(dtype: DtypeObj, how: str) -> DtypeObj:
DtypeObj
The desired dtype of the result.
"""
d = {
(np.dtype(np.bool), "add"): np.dtype(np.int64),
(np.dtype(np.bool), "cumsum"): np.dtype(np.int64),
(np.dtype(np.bool), "sum"): np.dtype(np.int64),
}
return d.get((dtype, how), dtype)
from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.arrays.integer import Int64Dtype

if how in ["add", "cumsum", "sum"] and (dtype == np.dtype(np.bool)):
return np.dtype(np.int64)
elif how in ["add", "cumsum", "sum"] and isinstance(dtype, BooleanDtype):
return Int64Dtype()
return dtype


def maybe_cast_to_extension_array(cls: Type["ExtensionArray"], obj, dtype=None):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def _cython_operation(
values = values.view("int64")
is_numeric = True
elif is_bool_dtype(values.dtype):
values = ensure_float64(values)
values = ensure_int_or_float(values)
elif is_integer_dtype(values):
# we use iNaT for the missing value on ints
# so pre-convert to guard this condition
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,23 @@ def test_in_numeric_groupby(self, data_for_grouping):

tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("min_count", [0, 10])
def test_groupby_sum_mincount(self, data_for_grouping, min_count):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
result = df.groupby("A").sum(min_count=min_count)
if min_count == 0:
expected = pd.DataFrame(
{"B": pd.array([3, 0, 0], dtype="Int64")},
index=pd.Index([1, 2, 3], name="A"),
)
tm.assert_frame_equal(result, expected)
else:
expected = pd.DataFrame(
{"B": pd.array([pd.NA] * 3, dtype="Int64")},
index=pd.Index([1, 2, 3], name="A"),
)
tm.assert_frame_equal(result, expected)


class TestNumericReduce(base.BaseNumericReduceTests):
def check_reduce(self, s, op_name, skipna):
Expand Down