-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN: remove index keyword from _wrap_aggregated_output #41128
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 4 commits
Commits
Show all changes
6 commits
Select commit
Hold shift + click to select a range
81897cd
REF: remove index kwarg from _wrap_aggregated_output
jbrockmendel 6328016
REF: remove _wrap_series_output
jbrockmendel f3b1baa
REF: remove unnecessary layer
jbrockmendel 90b662a
revert incorrect comment
jbrockmendel 941763b
PERF: next(iter(...))
jbrockmendel 1ab60ba
next(iter(...))
jbrockmendel File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -336,14 +336,12 @@ def _aggregate_multiple_funcs(self, arg): | |
# let higher level handle | ||
return results | ||
|
||
# Argument 1 to "_wrap_aggregated_output" of "SeriesGroupBy" has | ||
# incompatible type "Dict[OutputKey, Union[DataFrame, | ||
# Series]]"; | ||
# expected "Mapping[OutputKey, Union[Series, ndarray]]" | ||
output = self._wrap_aggregated_output( | ||
results, index=None # type: ignore[arg-type] | ||
) | ||
return self.obj._constructor_expanddim(output, columns=columns) | ||
indexed_output = {key.position: val for key, val in results.items()} | ||
output = self.obj._constructor_expanddim(indexed_output, index=None) | ||
output.columns = Index(key.label for key in results) | ||
|
||
output = self._reindex_output(output) | ||
return output | ||
|
||
def _cython_agg_general( | ||
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 | ||
|
@@ -371,78 +369,36 @@ def _cython_agg_general( | |
if not output: | ||
raise DataError("No numeric types to aggregate") | ||
|
||
# error: Argument 1 to "_wrap_aggregated_output" of "BaseGroupBy" has | ||
# incompatible type "Dict[OutputKey, Union[ndarray, DatetimeArray]]"; | ||
# expected "Mapping[OutputKey, ndarray]" | ||
return self._wrap_aggregated_output( | ||
output, index=self.grouper.result_index # type: ignore[arg-type] | ||
) | ||
|
||
# TODO: index should not be Optional - see GH 35490 | ||
def _wrap_series_output( | ||
self, | ||
output: Mapping[base.OutputKey, Series | ArrayLike], | ||
index: Index | None, | ||
) -> FrameOrSeriesUnion: | ||
""" | ||
Wraps the output of a SeriesGroupBy operation into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : Mapping[base.OutputKey, Union[Series, np.ndarray, ExtensionArray]] | ||
Data to wrap. | ||
index : pd.Index or None | ||
Index to apply to the output. | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
|
||
Notes | ||
----- | ||
In the vast majority of cases output and columns will only contain one | ||
element. The exception is operations that expand dimensions, like ohlc. | ||
""" | ||
indexed_output = {key.position: val for key, val in output.items()} | ||
columns = Index(key.label for key in output) | ||
return self._wrap_aggregated_output(output) | ||
|
||
result: FrameOrSeriesUnion | ||
if len(output) > 1: | ||
result = self.obj._constructor_expanddim(indexed_output, index=index) | ||
result.columns = columns | ||
elif not columns.empty: | ||
result = self.obj._constructor( | ||
indexed_output[0], index=index, name=columns[0] | ||
) | ||
else: | ||
result = self.obj._constructor_expanddim() | ||
|
||
return result | ||
|
||
# TODO: Remove index argument, use self.grouper.result_index, see GH 35490 | ||
def _wrap_aggregated_output( | ||
self, | ||
output: Mapping[base.OutputKey, Series | np.ndarray], | ||
index: Index | None, | ||
) -> FrameOrSeriesUnion: | ||
output: Mapping[base.OutputKey, Series | ArrayLike], | ||
) -> Series: | ||
""" | ||
Wraps the output of a SeriesGroupBy aggregation into the expected result. | ||
|
||
Parameters | ||
---------- | ||
output : Mapping[base.OutputKey, Union[Series, np.ndarray]] | ||
output : Mapping[base.OutputKey, Union[Series, ArrayLike]] | ||
Data to wrap. | ||
|
||
Returns | ||
------- | ||
Series or DataFrame | ||
Series | ||
|
||
Notes | ||
----- | ||
In the vast majority of cases output will only contain one element. | ||
The exception is operations that expand dimensions, like ohlc. | ||
""" | ||
result = self._wrap_series_output(output=output, index=index) | ||
assert len(output) == 1 | ||
|
||
name = self.obj.name | ||
index = self.grouper.result_index | ||
values = list(output.values())[0] | ||
|
||
result = self.obj._constructor(values, index=index, name=name) | ||
return self._reindex_output(result) | ||
|
||
def _wrap_transformed_output( | ||
|
@@ -466,7 +422,10 @@ def _wrap_transformed_output( | |
for consistency with DataFrame methods and _wrap_aggregated_output. | ||
""" | ||
assert len(output) == 1 | ||
result = self._wrap_series_output(output=output, index=self.obj.index) | ||
|
||
name = self.obj.name | ||
values = list(output.values())[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like the |
||
result = self.obj._constructor(values, index=self.obj.index, name=name) | ||
|
||
# No transformations increase the ndim of the result | ||
assert isinstance(result, Series) | ||
|
@@ -1115,14 +1074,6 @@ def _iterate_slices(self) -> Iterable[Series]: | |
def _cython_agg_general( | ||
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 | ||
) -> DataFrame: | ||
agg_mgr = self._cython_agg_manager( | ||
how, alt=alt, numeric_only=numeric_only, min_count=min_count | ||
) | ||
return self._wrap_agged_manager(agg_mgr) | ||
|
||
def _cython_agg_manager( | ||
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 | ||
) -> Manager2D: | ||
# Note: we never get here with how="ohlc"; that goes through SeriesGroupBy | ||
|
||
data: Manager2D = self._get_data_to_aggregate() | ||
|
@@ -1186,11 +1137,9 @@ def py_fallback(values: ArrayLike) -> ArrayLike: | |
sgb = get_groupby(obj, self.grouper, observed=True) | ||
result = sgb.aggregate(lambda x: alt(x, axis=self.axis)) | ||
|
||
assert isinstance(result, (Series, DataFrame)) # for mypy | ||
# In the case of object dtype block, it may have been split | ||
# in the operation. We un-split here. | ||
result = result._consolidate() | ||
assert isinstance(result, (Series, DataFrame)) # for mypy | ||
# unwrap DataFrame/Series to get array | ||
mgr = result._mgr | ||
arrays = mgr.arrays | ||
|
@@ -1226,7 +1175,7 @@ def array_func(values: ArrayLike) -> ArrayLike: | |
if not len(new_mgr): | ||
raise DataError("No numeric types to aggregate") | ||
|
||
return new_mgr | ||
return self._wrap_agged_manager(new_mgr) | ||
|
||
def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: | ||
if self.grouper.nkeys != 1: | ||
|
@@ -1733,7 +1682,6 @@ def _insert_inaxis_grouper_inplace(self, result: DataFrame) -> None: | |
def _wrap_aggregated_output( | ||
self, | ||
output: Mapping[base.OutputKey, Series | np.ndarray], | ||
index: Index | None, | ||
) -> DataFrame: | ||
""" | ||
Wraps the output of DataFrameGroupBy aggregations into the expected result. | ||
|
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tiny perf boost by not creating the intermediate list
gives
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
good idea, updated