From e8c6e4cda5c49b78e800cc842256ed4668d8d79b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 14 Jul 2023 16:59:23 -0700
Subject: [PATCH 1/6] BUG: Series.groupby.count returning int64 for masked and
 arrow types

---
 doc/source/whatsnew/v2.1.0.rst       |  1 +
 pandas/core/groupby/groupby.py       | 14 ++++++++++++--
 pandas/tests/extension/test_arrow.py |  7 +++++++
 pandas/tests/groupby/test_size.py    |  8 ++++++++
 4 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
index b7cc254d5c7e5..3c2c7f3147584 100644
--- a/doc/source/whatsnew/v2.1.0.rst
+++ b/doc/source/whatsnew/v2.1.0.rst
@@ -553,6 +553,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupBy.apply` raising a ``TypeError`` when selecting multiple columns and providing a function that returns ``np.ndarray`` results (:issue:`18930`)
 - Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
 - Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
+- Bug in :meth:`SeriesGroupBy.size` where the dtype would be ``np.int64`` for data with :class:`ArrowDtype` or masked dtypes (e.g. ``Int64``) (:issue:`53831`)
 - Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`)
 - Bug in :meth:`DataFrameGroupby.resample` with ``kind="period"`` raising ``AttributeError`` (:issue:`24103`)
 - Bug in :meth:`Resampler.ohlc` with empty object returning a :class:`Series` instead of empty :class:`DataFrame` (:issue:`42902`)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 85ec8c1b86374..183e8cd402dda 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -99,6 +99,7 @@ class providing the base-class of operations.
 from pandas.core._numba import executor
 from pandas.core.apply import warn_alias_replacement
 from pandas.core.arrays import (
+    ArrowExtensionArray,
     BaseMaskedArray,
     Categorical,
     ExtensionArray,
@@ -2890,12 +2891,21 @@ def size(self) -> DataFrame | Series:
         Freq: MS, dtype: int64
         """
         result = self.grouper.size()
+        result_dtype = result.dtype
+        if isinstance(self.obj, Series):
+            if isinstance(self.obj.array, ArrowExtensionArray):
+                result_dtype = "int64[pyarrow]"
+            elif isinstance(self.obj.array, BaseMaskedArray):
+                result_dtype = "Int64"
+        # TODO: For DataFrames what if columns are mixed arrow/numpy/masked?
 
         # GH28330 preserve subclassed Series/DataFrames through calls
         if isinstance(self.obj, Series):
-            result = self._obj_1d_constructor(result, name=self.obj.name)
+            result = self._obj_1d_constructor(
+                result, name=self.obj.name, dtype=result_dtype
+            )
         else:
-            result = self._obj_1d_constructor(result)
+            result = self._obj_1d_constructor(result, dtype=result_dtype)
 
         with com.temp_setattr(self, "as_index", True):
             # size already has the desired behavior in GH#49519, but this makes the
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 7b2bedc531076..7c19343f15a04 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3121,6 +3121,13 @@ def test_iter_temporal(pa_type):
     assert result == expected
 
 
+def test_groupby_series_size_returns_pa_int(data):
+    ser = pd.Series(data[:3], index=["a", "a", "b"])
+    result = ser.groupby(level=0).size()
+    expected = pd.Series([2, 1], dtype="int64[pyarrow]", index=["a", "b"])
+    tm.assert_series_equal(result, expected)
+
+
 @pytest.mark.parametrize(
     "pa_type", tm.DATETIME_PYARROW_DTYPES + tm.TIMEDELTA_PYARROW_DTYPES
 )
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
index b96fe41c26c3e..836eed990cb54 100644
--- a/pandas/tests/groupby/test_size.py
+++ b/pandas/tests/groupby/test_size.py
@@ -95,3 +95,11 @@ def test_size_on_categorical(as_index):
         expected = expected.set_index(["A", "B"])["size"].rename(None)
 
     tm.assert_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
+def test_size_series_masked_type_returns_Int64(dtype):
+    ser = Series([1, 1, 1], index=["a", "a", "b"], dtype=dtype)
+    result = ser.groupby(level=0).size()
+    expected = Series([2, 1], dtype="Int64", index=["a", "b"])
+    tm.assert_series_equal(result, expected)

From 1abafb9d579fff74c65a80e5293cd806c481ebd4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 14 Jul 2023 17:47:22 -0700
Subject: [PATCH 2/6] typing:

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 183e8cd402dda..d9561b9a3943d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2891,7 +2891,7 @@ def size(self) -> DataFrame | Series:
         Freq: MS, dtype: int64
         """
         result = self.grouper.size()
-        result_dtype = result.dtype
+        result_dtype: str | np.dtype = result.dtype
         if isinstance(self.obj, Series):
             if isinstance(self.obj.array, ArrowExtensionArray):
                 result_dtype = "int64[pyarrow]"

From 07147e0354509b225daa39eb31949a8d77275b2e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 17 Jul 2023 11:30:00 -0700
Subject: [PATCH 3/6] Add GH issue ref

---
 pandas/tests/extension/test_arrow.py | 1 +
 pandas/tests/groupby/test_size.py    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
index 7c19343f15a04..f9756810a8988 100644
--- a/pandas/tests/extension/test_arrow.py
+++ b/pandas/tests/extension/test_arrow.py
@@ -3122,6 +3122,7 @@ def test_iter_temporal(pa_type):
 
 
 def test_groupby_series_size_returns_pa_int(data):
+    # GH 54132
     ser = pd.Series(data[:3], index=["a", "a", "b"])
     result = ser.groupby(level=0).size()
     expected = pd.Series([2, 1], dtype="int64[pyarrow]", index=["a", "b"])
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
index 836eed990cb54..e7598ec34fa15 100644
--- a/pandas/tests/groupby/test_size.py
+++ b/pandas/tests/groupby/test_size.py
@@ -99,6 +99,7 @@ def test_size_on_categorical(as_index):
 
 @pytest.mark.parametrize("dtype", ["Int64", "Float64", "boolean"])
 def test_size_series_masked_type_returns_Int64(dtype):
+    # GH 54132
     ser = Series([1, 1, 1], index=["a", "a", "b"], dtype=dtype)
     result = ser.groupby(level=0).size()
     expected = Series([2, 1], dtype="Int64", index=["a", "b"])

From 9af9958e8332f966093021469f0bc922ec627eea Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jul 2023 11:01:04 -0700
Subject: [PATCH 4/6] Use convert_dtypes

---
 pandas/core/groupby/groupby.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 49d5567efeede..d6c6a273dc385 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2931,21 +2931,28 @@ def size(self) -> DataFrame | Series:
         Freq: MS, dtype: int64
         """
         result = self.grouper.size()
-        result_dtype: str | np.dtype = result.dtype
+        dtype_backend: str | None = None
         if isinstance(self.obj, Series):
             if isinstance(self.obj.array, ArrowExtensionArray):
-                result_dtype = "int64[pyarrow]"
+                dtype_backend = "pyarrow"
             elif isinstance(self.obj.array, BaseMaskedArray):
-                result_dtype = "Int64"
+                dtype_backend = "numpy_nullable"
         # TODO: For DataFrames what if columns are mixed arrow/numpy/masked?
 
         # GH28330 preserve subclassed Series/DataFrames through calls
         if isinstance(self.obj, Series):
-            result = self._obj_1d_constructor(
-                result, name=self.obj.name, dtype=result_dtype
-            )
+            result = self._obj_1d_constructor(result, name=self.obj.name)
         else:
-            result = self._obj_1d_constructor(result, dtype=result_dtype)
+            result = self._obj_1d_constructor(result)
+
+        if dtype_backend is not None:
+            result = result.convert_dtypes(
+                infer_objects=False,
+                convert_string=False,
+                convert_boolean=False,
+                convert_floating=False,
+                dtype_backend=dtype_backend,
+            )
 
         with com.temp_setattr(self, "as_index", True):
             # size already has the desired behavior in GH#49519, but this makes the

From 3783220c9dd2384428e7adc98cf97caff248568e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jul 2023 12:08:40 -0700
Subject: [PATCH 5/6] Remove typing

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d6c6a273dc385..17b1e041deff3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2931,7 +2931,7 @@ def size(self) -> DataFrame | Series:
         Freq: MS, dtype: int64
         """
         result = self.grouper.size()
-        dtype_backend: str | None = None
+        dtype_backend = None
         if isinstance(self.obj, Series):
             if isinstance(self.obj.array, ArrowExtensionArray):
                 dtype_backend = "pyarrow"

From 12ccd573623c773694c8f01f50dad55c633cce4d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 18 Jul 2023 12:12:20 -0700
Subject: [PATCH 6/6] Type better

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 17b1e041deff3..64d874a31c428 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2931,7 +2931,7 @@ def size(self) -> DataFrame | Series:
         Freq: MS, dtype: int64
         """
         result = self.grouper.size()
-        dtype_backend = None
+        dtype_backend: None | Literal["pyarrow", "numpy_nullable"] = None
         if isinstance(self.obj, Series):
             if isinstance(self.obj.array, ArrowExtensionArray):
                 dtype_backend = "pyarrow"