Skip to content

Commit ce95f4e

Browse files
authored
REGR: groupby.count returning string dtype instead of numeric for string input (#54752)
REGR: count returning string dtype instead of numeric for string input
1 parent 6d021ac commit ce95f4e

File tree

2 files changed

+15
-1
lines changed

2 files changed

+15
-1
lines changed

pandas/core/groupby/groupby.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ class providing the base-class of operations.
107107
IntegerArray,
108108
SparseArray,
109109
)
110+
from pandas.core.arrays.string_ import StringDtype
110111
from pandas.core.base import (
111112
PandasObject,
112113
SelectionMixin,
@@ -2281,7 +2282,9 @@ def hfunc(bvalues: ArrayLike) -> ArrayLike:
22812282
return IntegerArray(
22822283
counted[0], mask=np.zeros(counted.shape[1], dtype=np.bool_)
22832284
)
2284-
elif isinstance(bvalues, ArrowExtensionArray):
2285+
elif isinstance(bvalues, ArrowExtensionArray) and not isinstance(
2286+
bvalues.dtype, StringDtype
2287+
):
22852288
return type(bvalues)._from_sequence(counted[0])
22862289
if is_series:
22872290
assert counted.ndim == 2

pandas/tests/groupby/test_counting.py

+11
Original file line numberDiff line numberDiff line change
@@ -379,3 +379,14 @@ def __eq__(self, other):
379379
result = df.groupby("grp").count()
380380
expected = DataFrame({"a": [2, 2]}, index=Index(list("ab"), name="grp"))
381381
tm.assert_frame_equal(result, expected)
382+
383+
384+
def test_count_arrow_string_array(any_string_dtype):
385+
# GH#54751
386+
pytest.importorskip("pyarrow")
387+
df = DataFrame(
388+
{"a": [1, 2, 3], "b": Series(["a", "b", "a"], dtype=any_string_dtype)}
389+
)
390+
result = df.groupby("a").count()
391+
expected = DataFrame({"b": 1}, index=Index([1, 2, 3], name="a"))
392+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)