Skip to content

REF: de-duplicate _apply_to_group #29628

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 17, 2019
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 26 additions & 24 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,24 @@ cdef class _BaseGrouper:

return cached_typ, cached_ityp

cdef inline object _apply_to_group(self,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i would return a tuple(res, intitialized) here, less c-like i think is good generally (and this likely doesn't make it differetnt perf wise as this is object anyhow)

object cached_typ, object cached_ityp,
Slider islider, Slider vslider,
Py_ssize_t group_size, bint* initialized):
cached_ityp._engine.clear_mapping()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add some sort of doc-string

res = self.f(cached_typ)
res = _extract_result(res)
if not initialized[0]:
# On the first pass, we check the output shape to see
# if this looks like a reduction.
initialized[0] = 1
_check_result_array(res, len(self.dummy_arr))

islider.advance(group_size)
vslider.advance(group_size)

return res


cdef class SeriesBinGrouper(_BaseGrouper):
"""
Expand Down Expand Up @@ -217,7 +235,7 @@ cdef class SeriesBinGrouper(_BaseGrouper):
self.typ = series._constructor
self.ityp = series.index._constructor
self.index = series.index.values
self.name = getattr(series, 'name', None)
self.name = series.name

self.dummy_arr, self.dummy_index = self._check_dummy(dummy)

Expand Down Expand Up @@ -265,20 +283,12 @@ cdef class SeriesBinGrouper(_BaseGrouper):
cached_typ, cached_ityp = self._update_cached_objs(
cached_typ, cached_ityp, islider, vslider)

cached_ityp._engine.clear_mapping()
res = self.f(cached_typ)
res = _extract_result(res)
if not initialized:
# On the first pass, we check the output shape to see
# if this looks like a reduction.
initialized = 1
_check_result_array(res, len(self.dummy_arr))
res = self._apply_to_group(cached_typ, cached_ityp,
islider, vslider,
group_size, &initialized)

result[i] = res

islider.advance(group_size)
vslider.advance(group_size)

finally:
# so we don't free the wrong memory
islider.reset()
Expand Down Expand Up @@ -322,7 +332,7 @@ cdef class SeriesGrouper(_BaseGrouper):
self.typ = series._constructor
self.ityp = series.index._constructor
self.index = series.index.values
self.name = getattr(series, 'name', None)
self.name = series.name

self.dummy_arr, self.dummy_index = self._check_dummy(dummy)
self.ngroups = ngroups
Expand Down Expand Up @@ -367,20 +377,12 @@ cdef class SeriesGrouper(_BaseGrouper):
cached_typ, cached_ityp = self._update_cached_objs(
cached_typ, cached_ityp, islider, vslider)

cached_ityp._engine.clear_mapping()
res = self.f(cached_typ)
res = _extract_result(res)
if not initialized:
# On the first pass, we check the output shape to see
# if this looks like a reduction.
initialized = 1
_check_result_array(res, len(self.dummy_arr))
res = self._apply_to_group(cached_typ, cached_ityp,
islider, vslider,
group_size, &initialized)

result[lab] = res
counts[lab] = group_size
islider.advance(group_size)
vslider.advance(group_size)

group_size = 0

finally:
Expand Down