From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Tue, 21 Dec 2021 13:39:55 -0500
Subject: [PATCH 1/9] fix column_arrays for array manager

---
 pandas/core/internals/array_manager.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
index 09f16a2ddab67..06849bffff5ca 100644
--- a/pandas/core/internals/array_manager.py
+++ b/pandas/core/internals/array_manager.py
@@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]:
         """
         Used in the JSON C code to access column arrays.
         """
-        return self.arrays
+
+        def convert_array(arr: ArrayLike) -> ArrayLike:
+            if isinstance(arr, ExtensionArray):
+                return arr.to_numpy()
+            else:
+                return arr
+
+        return [convert_array(arr) for arr in self.arrays]
 
     def iset(
         self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False

From 732f133d884d8000981910201ea535cefe7d7d27 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Wed, 9 Feb 2022 13:03:18 -0500
Subject: [PATCH 2/9] TYP: fix return types for groupby.size(),
 groupby.count(), groupby.apply()

---
 pandas/core/groupby/generic.py | 17 ++++++++++++++++-
 pandas/core/groupby/groupby.py |  2 --
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 71cef46950e12..7d89712fcc211 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -240,7 +240,7 @@ def _iterate_slices(self) -> Iterable[Series]:
             input="series", examples=_apply_docs["series_examples"]
         )
     )
-    def apply(self, func, *args, **kwargs):
+    def apply(self, func, *args, **kwargs) -> Series:
         return super().apply(func, *args, **kwargs)
 
     @doc(_agg_template, examples=_agg_examples_doc, klass="Series")
@@ -587,6 +587,12 @@ def nunique(self, dropna: bool = True) -> Series:
     def describe(self, **kwargs):
         return super().describe(**kwargs)
 
+    def count(self) -> Series:
+        return cast(Series, super().count())
+
+    def size(self) -> Series:
+        return cast(Series, super().size())
+
     def value_counts(
         self,
         normalize: bool = False,
@@ -764,6 +770,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
 
     _apply_allowlist = base.dataframe_apply_allowlist
 
+    def apply(self, func, *args, **kwargs) -> DataFrame:
+        return super().apply(func, *args, **kwargs)
+
     _agg_examples_doc = dedent(
         """
     Examples
@@ -1535,6 +1544,12 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         return results
 
+    def count(self) -> DataFrame:
+        return cast(DataFrame, super().count())
+
+    def size(self) -> DataFrame:
+        return cast(DataFrame, super().size())
+
     @Appender(DataFrame.idxmax.__doc__)
     def idxmax(self, axis=0, skipna: bool = True):
         axis = DataFrame._get_axis_number(axis)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4eb907e06adf1..82299d398cefc 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1827,7 +1827,6 @@ def all(self, skipna: bool = True):
         """
         return self._bool_agg("all", skipna)
 
-    @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
     def count(self) -> Series | DataFrame:
@@ -2135,7 +2134,6 @@ def sem(self, ddof: int = 1):
             result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
         return result
 
-    @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
     def size(self) -> DataFrame | Series:

From 92992dbb74a9a4a8eb0c7d6e9ede4495cd4b6b75 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Wed, 9 Feb 2022 15:00:52 -0500
Subject: [PATCH 3/9] add comment that change is about typing

---
 pandas/core/groupby/generic.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7d89712fcc211..e933077c84c86 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -587,6 +587,8 @@ def nunique(self, dropna: bool = True) -> Series:
     def describe(self, **kwargs):
         return super().describe(**kwargs)
 
+    # GH45875 - Added these methods to handle typing
+    # Should be able to add @final to groupby.py at some point
     def count(self) -> Series:
         return cast(Series, super().count())
 
@@ -769,6 +771,8 @@ def nsmallest(self, n: int = 5, keep: str = "first"):
 class DataFrameGroupBy(GroupBy[DataFrame]):
 
     _apply_allowlist = base.dataframe_apply_allowlist
+    # GH45875 - Added this method to handle typing
+    # Should be able to add @final to groupby.py at some point
 
     def apply(self, func, *args, **kwargs) -> DataFrame:
         return super().apply(func, *args, **kwargs)
@@ -1544,6 +1548,8 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         return results
 
+    # GH45875 - Added these methods to handle typing
+    # Should be able to add @final to groupby.py at some point
     def count(self) -> DataFrame:
         return cast(DataFrame, super().count())
 

From 0377ba9cf4e0a27850b686556df3649a07ae597e Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Wed, 9 Feb 2022 16:15:47 -0500
Subject: [PATCH 4/9] change spacing on comment

---
 pandas/core/groupby/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e933077c84c86..ebc45c2675534 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -771,9 +771,9 @@ def nsmallest(self, n: int = 5, keep: str = "first"):
 class DataFrameGroupBy(GroupBy[DataFrame]):
 
     _apply_allowlist = base.dataframe_apply_allowlist
+
     # GH45875 - Added this method to handle typing
     # Should be able to add @final to groupby.py at some point
-
     def apply(self, func, *args, **kwargs) -> DataFrame:
         return super().apply(func, *args, **kwargs)
 

From bb8643e748ea2362c945f255f2934c1049a79ced Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Thu, 10 Feb 2022 07:39:29 -0500
Subject: [PATCH 5/9] better way of keeping the types

---
 pandas/core/groupby/generic.py | 21 ---------------------
 pandas/core/groupby/groupby.py | 15 +++++++++------
 2 files changed, 9 insertions(+), 27 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ebc45c2675534..8897467dc79a7 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -587,14 +587,6 @@ def nunique(self, dropna: bool = True) -> Series:
     def describe(self, **kwargs):
         return super().describe(**kwargs)
 
-    # GH45875 - Added these methods to handle typing
-    # Should be able to add @final to groupby.py at some point
-    def count(self) -> Series:
-        return cast(Series, super().count())
-
-    def size(self) -> Series:
-        return cast(Series, super().size())
-
     def value_counts(
         self,
         normalize: bool = False,
@@ -772,11 +764,6 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
 
     _apply_allowlist = base.dataframe_apply_allowlist
 
-    # GH45875 - Added this method to handle typing
-    # Should be able to add @final to groupby.py at some point
-    def apply(self, func, *args, **kwargs) -> DataFrame:
-        return super().apply(func, *args, **kwargs)
-
     _agg_examples_doc = dedent(
         """
     Examples
@@ -1548,14 +1535,6 @@ def nunique(self, dropna: bool = True) -> DataFrame:
 
         return results
 
-    # GH45875 - Added these methods to handle typing
-    # Should be able to add @final to groupby.py at some point
-    def count(self) -> DataFrame:
-        return cast(DataFrame, super().count())
-
-    def size(self) -> DataFrame:
-        return cast(DataFrame, super().size())
-
     @Appender(DataFrame.idxmax.__doc__)
     def idxmax(self, axis=0, skipna: bool = True):
         axis = DataFrame._get_axis_number(axis)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 82299d398cefc..1178ae40e2713 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -923,7 +923,7 @@ def _make_wrapper(self, name: str) -> Callable:
             # as are not passed directly but in the grouper
             f = getattr(self._obj_with_exclusions, name)
             if not isinstance(f, types.MethodType):
-                return self.apply(lambda self: getattr(self, name))
+                return cast(Callable, self.apply(lambda self: getattr(self, name)))
 
         f = getattr(type(self._obj_with_exclusions), name)
         sig = inspect.signature(f)
@@ -1372,7 +1372,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs)
             input="dataframe", examples=_apply_docs["dataframe_examples"]
         )
     )
-    def apply(self, func, *args, **kwargs):
+    def apply(self, func, *args, **kwargs) -> NDFrameT:
 
         func = com.is_builtin_func(func)
 
@@ -1432,7 +1432,7 @@ def _python_apply_general(
         f: Callable,
         data: DataFrame | Series,
         not_indexed_same: bool | None = None,
-    ) -> DataFrame | Series:
+    ) -> NDFrameT:
         """
         Apply function f in python space
 
@@ -1827,9 +1827,10 @@ def all(self, skipna: bool = True):
         """
         return self._bool_agg("all", skipna)
 
+    @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def count(self) -> Series | DataFrame:
+    def count(self) -> NDFrameT:  # Series | DataFrame:
         """
         Compute count of group, excluding missing values.
 
@@ -2134,9 +2135,10 @@ def sem(self, ddof: int = 1):
             result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs])
         return result
 
+    @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def size(self) -> DataFrame | Series:
+    def size(self) -> NDFrameT:
         """
         Compute group sizes.
 
@@ -2158,7 +2160,8 @@ def size(self) -> DataFrame | Series:
             # Item "None" of "Optional[Series]" has no attribute "reset_index"
             result = result.rename("size").reset_index()  # type: ignore[union-attr]
 
-        return self._reindex_output(result, fill_value=0)
+        # GH 45875 cast ensures result will be Series or DataFrame, as appropriate
+        return cast(NDFrameT, self._reindex_output(result, fill_value=0))
 
     @final
     @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)

From 665b03844331e80a49c1eb740ff07878d58dd476 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Thu, 10 Feb 2022 07:47:08 -0500
Subject: [PATCH 6/9] remove spurious comment

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 1178ae40e2713..8406be4d88fa1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1830,7 +1830,7 @@ def all(self, skipna: bool = True):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def count(self) -> NDFrameT:  # Series | DataFrame:
+    def count(self) -> NDFrameT:
         """
         Compute count of group, excluding missing values.
 

From b1b5029bbbc275a796f94fe84ace7a39a728e2e3 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Thu, 10 Feb 2022 08:48:22 -0500
Subject: [PATCH 7/9] remove changes for size()

---
 pandas/core/groupby/groupby.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8406be4d88fa1..554be1e8969d6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2138,7 +2138,7 @@ def sem(self, ddof: int = 1):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def size(self) -> NDFrameT:
+    def size(self) -> DataFrame | Series:
         """
         Compute group sizes.
 
@@ -2160,8 +2160,7 @@ def size(self) -> NDFrameT:
             # Item "None" of "Optional[Series]" has no attribute "reset_index"
             result = result.rename("size").reset_index()  # type: ignore[union-attr]
 
-        # GH 45875 cast ensures result will be Series or DataFrame, as appropriate
-        return cast(NDFrameT, self._reindex_output(result, fill_value=0))
+        return self._reindex_output(result, fill_value=0)
 
     @final
     @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)

From ddb824d38dc17bb4413bc8e01dad24eadf96a94b Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Thu, 10 Feb 2022 09:36:46 -0500
Subject: [PATCH 8/9] add comment on cast

---
 pandas/core/groupby/groupby.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 554be1e8969d6..7ea6f3aaa8a7c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -923,6 +923,8 @@ def _make_wrapper(self, name: str) -> Callable:
             # as are not passed directly but in the grouper
             f = getattr(self._obj_with_exclusions, name)
             if not isinstance(f, types.MethodType):
+                #  error: Incompatible return value type
+                # (got "NDFrameT", expected "Callable[..., Any]")  [return-value]
                 return cast(Callable, self.apply(lambda self: getattr(self, name)))
 
         f = getattr(type(self._obj_with_exclusions), name)

From cac827c791ab483f6b2a84f94d82508b04e61484 Mon Sep 17 00:00:00 2001
From: Irv Lustig <irv@princeton.com>
Date: Mon, 7 Mar 2022 13:43:11 -0500
Subject: [PATCH 9/9] fix return type of groupby.diff

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ff09e8608eed0..9886ab53fb9f3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -3461,7 +3461,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
     @final
     @Substitution(name="groupby")
     @Appender(_common_see_also)
-    def diff(self, periods: int = 1, axis: int = 0) -> Series | DataFrame:
+    def diff(self, periods: int = 1, axis: int = 0) -> NDFrameT:
         """
         First discrete difference of element.