From 9e57d6d212a4edf9227c137cf56ca92239dfb4f5 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Tue, 21 Dec 2021 13:39:55 -0500 Subject: [PATCH 1/9] fix column_arrays for array manager --- pandas/core/internals/array_manager.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 09f16a2ddab67..06849bffff5ca 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -794,7 +794,14 @@ def column_arrays(self) -> list[ArrayLike]: """ Used in the JSON C code to access column arrays. """ - return self.arrays + + def convert_array(arr: ArrayLike) -> ArrayLike: + if isinstance(arr, ExtensionArray): + return arr.to_numpy() + else: + return arr + + return [convert_array(arr) for arr in self.arrays] def iset( self, loc: int | slice | np.ndarray, value: ArrayLike, inplace: bool = False From 732f133d884d8000981910201ea535cefe7d7d27 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 9 Feb 2022 13:03:18 -0500 Subject: [PATCH 2/9] TYP: fix return types for groupby.size(), groupby.count(), groupby.apply() --- pandas/core/groupby/generic.py | 17 ++++++++++++++++- pandas/core/groupby/groupby.py | 2 -- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 71cef46950e12..7d89712fcc211 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -240,7 +240,7 @@ def _iterate_slices(self) -> Iterable[Series]: input="series", examples=_apply_docs["series_examples"] ) ) - def apply(self, func, *args, **kwargs): + def apply(self, func, *args, **kwargs) -> Series: return super().apply(func, *args, **kwargs) @doc(_agg_template, examples=_agg_examples_doc, klass="Series") @@ -587,6 +587,12 @@ def nunique(self, dropna: bool = True) -> Series: def describe(self, **kwargs): return super().describe(**kwargs) + def count(self) -> Series: + return cast(Series, super().count()) + + def size(self) -> Series: + return cast(Series, super().size()) + def value_counts( self, normalize: bool = False, @@ -764,6 +770,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): _apply_allowlist = base.dataframe_apply_allowlist + def apply(self, func, *args, **kwargs) -> DataFrame: + return super().apply(func, *args, **kwargs) + _agg_examples_doc = dedent( """ Examples @@ -1535,6 +1544,12 @@ def nunique(self, dropna: bool = True) -> DataFrame: return results + def count(self) -> DataFrame: + return cast(DataFrame, super().count()) + + def size(self) -> DataFrame: + return cast(DataFrame, super().size()) + @Appender(DataFrame.idxmax.__doc__) def idxmax(self, axis=0, skipna: bool = True): axis = DataFrame._get_axis_number(axis) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4eb907e06adf1..82299d398cefc 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1827,7 +1827,6 @@ def all(self, skipna: bool = True): """ return self._bool_agg("all", skipna) - @final @Substitution(name="groupby") @Appender(_common_see_also) def count(self) -> Series | DataFrame: @@ -2135,7 +2134,6 @@ def sem(self, ddof: int = 1): result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result - @final @Substitution(name="groupby") @Appender(_common_see_also) def size(self) -> DataFrame | Series: From 92992dbb74a9a4a8eb0c7d6e9ede4495cd4b6b75 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 9 Feb 2022 15:00:52 -0500 Subject: [PATCH 3/9] add comment that change is about typing --- pandas/core/groupby/generic.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7d89712fcc211..e933077c84c86 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -587,6 +587,8 @@ def nunique(self, dropna: bool = True) -> Series: def describe(self, **kwargs): return super().describe(**kwargs) + # GH45875 - Added these methods to handle typing + # Should be able to add @final to groupby.py at some point def count(self) -> Series: return cast(Series, super().count()) @@ -769,6 +771,8 @@ def nsmallest(self, n: int = 5, keep: str = "first"): class DataFrameGroupBy(GroupBy[DataFrame]): _apply_allowlist = base.dataframe_apply_allowlist + # GH45875 - Added this method to handle typing + # Should be able to add @final to groupby.py at some point def apply(self, func, *args, **kwargs) -> DataFrame: return super().apply(func, *args, **kwargs) @@ -1544,6 +1548,8 @@ def nunique(self, dropna: bool = True) -> DataFrame: return results + # GH45875 - Added these methods to handle typing + # Should be able to add @final to groupby.py at some point def count(self) -> DataFrame: return cast(DataFrame, super().count()) From 0377ba9cf4e0a27850b686556df3649a07ae597e Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 9 Feb 2022 16:15:47 -0500 Subject: [PATCH 4/9] change spacing on comment --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e933077c84c86..ebc45c2675534 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -771,9 +771,9 @@ def nsmallest(self, n: int = 5, keep: str = "first"): class DataFrameGroupBy(GroupBy[DataFrame]): _apply_allowlist = base.dataframe_apply_allowlist + # GH45875 - Added this method to handle typing # Should be able to add @final to groupby.py at some point - def apply(self, func, *args, **kwargs) -> DataFrame: return super().apply(func, *args, **kwargs) From bb8643e748ea2362c945f255f2934c1049a79ced Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 10 Feb 2022 07:39:29 -0500 Subject: [PATCH 5/9] better way of keeping the types --- pandas/core/groupby/generic.py | 21 --------------------- pandas/core/groupby/groupby.py | 15 +++++++++------ 2 files changed, 9 insertions(+), 27 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ebc45c2675534..8897467dc79a7 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -587,14 +587,6 @@ def nunique(self, dropna: bool = True) -> Series: def describe(self, **kwargs): return super().describe(**kwargs) - # GH45875 - Added these methods to handle typing - # Should be able to add @final to groupby.py at some point - def count(self) -> Series: - return cast(Series, super().count()) - - def size(self) -> Series: - return cast(Series, super().size()) - def value_counts( self, normalize: bool = False, @@ -772,11 +764,6 @@ class DataFrameGroupBy(GroupBy[DataFrame]): _apply_allowlist = base.dataframe_apply_allowlist - # GH45875 - Added this method to handle typing - # Should be able to add @final to groupby.py at some point - def apply(self, func, *args, **kwargs) -> DataFrame: - return super().apply(func, *args, **kwargs) - _agg_examples_doc = dedent( """ Examples @@ -1548,14 +1535,6 @@ def nunique(self, dropna: bool = True) -> DataFrame: return results - # GH45875 - Added these methods to handle typing - # Should be able to add @final to groupby.py at some point - def count(self) -> DataFrame: - return cast(DataFrame, super().count()) - - def size(self) -> DataFrame: - return cast(DataFrame, super().size()) - @Appender(DataFrame.idxmax.__doc__) def idxmax(self, axis=0, skipna: bool = True): axis = DataFrame._get_axis_number(axis) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 82299d398cefc..1178ae40e2713 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -923,7 +923,7 @@ def _make_wrapper(self, name: str) -> Callable: # as are not passed directly but in the grouper f = getattr(self._obj_with_exclusions, name) if not isinstance(f, types.MethodType): - return self.apply(lambda self: getattr(self, name)) + return cast(Callable, self.apply(lambda self: getattr(self, name))) f = getattr(type(self._obj_with_exclusions), name) sig = inspect.signature(f) @@ -1372,7 +1372,7 @@ def _aggregate_with_numba(self, data, func, *args, engine_kwargs=None, **kwargs) input="dataframe", examples=_apply_docs["dataframe_examples"] ) ) - def apply(self, func, *args, **kwargs): + def apply(self, func, *args, **kwargs) -> NDFrameT: func = com.is_builtin_func(func) @@ -1432,7 +1432,7 @@ def _python_apply_general( f: Callable, data: DataFrame | Series, not_indexed_same: bool | None = None, - ) -> DataFrame | Series: + ) -> NDFrameT: """ Apply function f in python space @@ -1827,9 +1827,10 @@ def all(self, skipna: bool = True): """ return self._bool_agg("all", skipna) + @final @Substitution(name="groupby") @Appender(_common_see_also) - def count(self) -> Series | DataFrame: + def count(self) -> NDFrameT: # Series | DataFrame: """ Compute count of group, excluding missing values. @@ -2134,9 +2135,10 @@ def sem(self, ddof: int = 1): result.iloc[:, result_ilocs] /= np.sqrt(counts.iloc[:, count_ilocs]) return result + @final @Substitution(name="groupby") @Appender(_common_see_also) - def size(self) -> DataFrame | Series: + def size(self) -> NDFrameT: """ Compute group sizes. @@ -2158,7 +2160,8 @@ def size(self) -> DataFrame | Series: # Item "None" of "Optional[Series]" has no attribute "reset_index" result = result.rename("size").reset_index() # type: ignore[union-attr] - return self._reindex_output(result, fill_value=0) + # GH 45875 cast ensures result will be Series or DataFrame, as appropriate + return cast(NDFrameT, self._reindex_output(result, fill_value=0)) @final @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) From 665b03844331e80a49c1eb740ff07878d58dd476 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 10 Feb 2022 07:47:08 -0500 Subject: [PATCH 6/9] remove spurious comment --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1178ae40e2713..8406be4d88fa1 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1830,7 +1830,7 @@ def all(self, skipna: bool = True): @final @Substitution(name="groupby") @Appender(_common_see_also) - def count(self) -> NDFrameT: # Series | DataFrame: + def count(self) -> NDFrameT: """ Compute count of group, excluding missing values. From b1b5029bbbc275a796f94fe84ace7a39a728e2e3 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 10 Feb 2022 08:48:22 -0500 Subject: [PATCH 7/9] remove changes for size() --- pandas/core/groupby/groupby.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 8406be4d88fa1..554be1e8969d6 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2138,7 +2138,7 @@ def sem(self, ddof: int = 1): @final @Substitution(name="groupby") @Appender(_common_see_also) - def size(self) -> NDFrameT: + def size(self) -> DataFrame | Series: """ Compute group sizes. @@ -2160,8 +2160,7 @@ def size(self) -> NDFrameT: # Item "None" of "Optional[Series]" has no attribute "reset_index" result = result.rename("size").reset_index() # type: ignore[union-attr] - # GH 45875 cast ensures result will be Series or DataFrame, as appropriate - return cast(NDFrameT, self._reindex_output(result, fill_value=0)) + return self._reindex_output(result, fill_value=0) @final @doc(_groupby_agg_method_template, fname="sum", no=True, mc=0) From ddb824d38dc17bb4413bc8e01dad24eadf96a94b Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Thu, 10 Feb 2022 09:36:46 -0500 Subject: [PATCH 8/9] add comment on cast --- pandas/core/groupby/groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 554be1e8969d6..7ea6f3aaa8a7c 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -923,6 +923,8 @@ def _make_wrapper(self, name: str) -> Callable: # as are not passed directly but in the grouper f = getattr(self._obj_with_exclusions, name) if not isinstance(f, types.MethodType): + # error: Incompatible return value type + # (got "NDFrameT", expected "Callable[..., Any]") [return-value] return cast(Callable, self.apply(lambda self: getattr(self, name))) f = getattr(type(self._obj_with_exclusions), name) From cac827c791ab483f6b2a84f94d82508b04e61484 Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Mon, 7 Mar 2022 13:43:11 -0500 Subject: [PATCH 9/9] fix return type of groupby.diff --- pandas/core/groupby/groupby.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index ff09e8608eed0..9886ab53fb9f3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -3461,7 +3461,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): @final @Substitution(name="groupby") @Appender(_common_see_also) - def diff(self, periods: int = 1, axis: int = 0) -> Series | DataFrame: + def diff(self, periods: int = 1, axis: int = 0) -> NDFrameT: """ First discrete difference of element.