Skip to content

REF: de-lazify Resampler init #41245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 2 additions & 10 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
axis: int
grouper: ops.BaseGrouper
obj: FrameOrSeries
group_keys: bool

@final
def __len__(self) -> int:
Expand All @@ -556,26 +557,17 @@ def __repr__(self) -> str:
# TODO: Better repr for GroupBy object
return object.__repr__(self)

def _assure_grouper(self) -> None:
"""
We create the grouper on instantiation sub-classes may have a
different policy.
"""
pass

@final
@property
def groups(self) -> dict[Hashable, np.ndarray]:
"""
Dict {group name -> group labels}.
"""
self._assure_grouper()
return self.grouper.groups

@final
@property
def ngroups(self) -> int:
self._assure_grouper()
return self.grouper.ngroups

@final
Expand All @@ -584,7 +576,6 @@ def indices(self):
"""
Dict {group name -> group indices}.
"""
self._assure_grouper()
return self.grouper.indices

@final
Expand Down Expand Up @@ -721,6 +712,7 @@ def get_group(self, name, obj=None):

return obj._take_with_is_copy(inds, axis=self.axis)

@final
def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]:
"""
Groupby iterator.
Expand Down
109 changes: 38 additions & 71 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import (
TYPE_CHECKING,
Callable,
Hashable,
no_type_check,
)

Expand Down Expand Up @@ -101,8 +102,8 @@ class Resampler(BaseGroupBy, PandasObject):
Parameters
----------
obj : pandas object
groupby : a TimeGrouper object
obj : Series or DataFrame
groupby : TimeGrouper
axis : int, default 0
kind : str or None
'period', 'timestamp' to override default index treatment
Expand All @@ -116,10 +117,8 @@ class Resampler(BaseGroupBy, PandasObject):
After resampling, see aggregate, apply, and transform functions.
"""

# error: Incompatible types in assignment (expression has type
# "Optional[BinGrouper]", base class "BaseGroupBy" defined the type as
# "BaseGrouper")
grouper: BinGrouper | None # type: ignore[assignment]
grouper: BinGrouper
exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat

# to the groupby descriptor
_attributes = [
Expand All @@ -134,7 +133,14 @@ class Resampler(BaseGroupBy, PandasObject):
"offset",
]

def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
def __init__(
self,
obj: FrameOrSeries,
groupby: TimeGrouper,
axis: int = 0,
kind=None,
**kwargs,
):
self.groupby = groupby
self.keys = None
self.sort = True
Expand All @@ -143,12 +149,9 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
self.squeeze = False
self.group_keys = True
self.as_index = True
self.exclusions = set()
self.binner = None
self.grouper = None

if self.groupby is not None:
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
self.binner, self.grouper = self._get_binner()

@final
def _shallow_copy(self, obj, **kwargs):
Expand Down Expand Up @@ -183,25 +186,12 @@ def __getattr__(self, attr: str):

return object.__getattribute__(self, attr)

def __iter__(self):
"""
Resampler iterator.
Returns
-------
Generator yielding sequence of (name, subsetted object)
for each group.
See Also
--------
GroupBy.__iter__ : Generator yielding sequence for each group.
"""
self._set_binner()
return super().__iter__()

# error: Signature of "obj" incompatible with supertype "BaseGroupBy"
@property
def obj(self):
return self.groupby.obj
def obj(self) -> FrameOrSeries: # type: ignore[override]
# error: Incompatible return value type (got "Optional[Any]",
# expected "FrameOrSeries")
return self.groupby.obj # type: ignore[return-value]

@property
def ax(self):
Expand All @@ -218,32 +208,24 @@ def _from_selection(self) -> bool:
self.groupby.key is not None or self.groupby.level is not None
)

def _convert_obj(self, obj):
def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries:
"""
Provide any conversions for the object in order to correctly handle.
Parameters
----------
obj : the object to be resampled
obj : Series or DataFrame
Returns
-------
obj : converted object
Series or DataFrame
"""
return obj._consolidate()

def _get_binner_for_time(self):
raise AbstractMethodError(self)

def _set_binner(self):
"""
Setup our binners.
Cache these as we are an immutable object
"""
if self.binner is None:
self.binner, self.grouper = self._get_binner()

@final
def _get_binner(self):
"""
Create the BinGrouper, assume that self.set_grouper(obj)
Expand All @@ -254,12 +236,6 @@ def _get_binner(self):
bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
return binner, bin_grouper

def _assure_grouper(self):
"""
Make sure that we are creating our binner & grouper.
"""
self._set_binner()

@Substitution(
klass="Resampler",
examples="""
Expand Down Expand Up @@ -349,7 +325,6 @@ def pipe(
)
def aggregate(self, func, *args, **kwargs):

self._set_binner()
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
if result is None:
how = func
Expand Down Expand Up @@ -400,7 +375,6 @@ def _gotitem(self, key, ndim: int, subset=None):
subset : object, default None
subset to act on
"""
self._set_binner()
grouper = self.grouper
if subset is None:
subset = self.obj
Expand All @@ -417,7 +391,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
Re-evaluate the obj with a groupby aggregation.
"""
if grouper is None:
self._set_binner()
grouper = self.grouper

obj = self._selected_obj
Expand Down Expand Up @@ -1050,8 +1023,8 @@ def __init__(self, obj, parent=None, groupby=None, **kwargs):
for attr in self._attributes:
setattr(self, attr, kwargs.get(attr, getattr(parent, attr)))

# error: Too many arguments for "__init__" of "object"
super().__init__(None) # type: ignore[call-arg]
self.binner = parent.binner

self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
Expand Down Expand Up @@ -1137,7 +1110,6 @@ def _downsample(self, how, **kwargs):
how : string / cython mapped function
**kwargs : kw args passed to how function
"""
self._set_binner()
how = com.get_cython_func(how) or how
ax = self.ax
obj = self._selected_obj
Expand All @@ -1154,7 +1126,7 @@ def _downsample(self, how, **kwargs):
# error: Item "None" of "Optional[Any]" has no attribute "binlabels"
if (
(ax.freq is not None or ax.inferred_freq is not None)
and len(self.grouper.binlabels) > len(ax) # type: ignore[union-attr]
and len(self.grouper.binlabels) > len(ax)
and how is None
):

Expand Down Expand Up @@ -1196,7 +1168,6 @@ def _upsample(self, method, limit=None, fill_value=None):
.fillna: Fill NA/NaN values using the specified method.
"""
self._set_binner()
if self.axis:
raise AssertionError("axis must be 0")
if self._from_selection:
Expand Down Expand Up @@ -1257,7 +1228,7 @@ def _get_binner_for_time(self):
return super()._get_binner_for_time()
return self.groupby._get_period_bins(self.ax)

def _convert_obj(self, obj):
def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries:
obj = super()._convert_obj(obj)

if self._from_selection:
Expand Down Expand Up @@ -1336,7 +1307,6 @@ def _upsample(self, method, limit=None, fill_value=None):
if self.kind == "timestamp":
return super()._upsample(method, limit=limit, fill_value=fill_value)

self._set_binner()
ax = self.ax
obj = self.obj
new_index = self.binner
Expand All @@ -1349,9 +1319,7 @@ def _upsample(self, method, limit=None, fill_value=None):
new_obj = _take_new_index(
obj,
indexer,
# error: Argument 3 to "_take_new_index" has incompatible type
# "Optional[Any]"; expected "Index"
new_index, # type: ignore[arg-type]
new_index,
axis=self.axis,
)
return self._wrap_result(new_obj)
Expand Down Expand Up @@ -1511,20 +1479,20 @@ def __init__(
else:
try:
self.origin = Timestamp(origin)
except Exception as e:
except (ValueError, TypeError) as err:
raise ValueError(
"'origin' should be equal to 'epoch', 'start', 'start_day', "
"'end', 'end_day' or "
f"should be a Timestamp convertible type. Got '{origin}' instead."
) from e
) from err

try:
self.offset = Timedelta(offset) if offset is not None else None
except Exception as e:
except (ValueError, TypeError) as err:
raise ValueError(
"'offset' should be a Timedelta convertible type. "
f"Got '{offset}' instead."
) from e
) from err

# always sort time groupers
kwargs["sort"] = True
Expand Down Expand Up @@ -1585,10 +1553,9 @@ def _get_resampler(self, obj, kind=None):
def _get_grouper(self, obj, validate: bool = True):
# create the resampler and return our binner
r = self._get_resampler(obj)
r._set_binner()
return r.binner, r.grouper, r.obj

def _get_time_bins(self, ax):
def _get_time_bins(self, ax: DatetimeIndex):
if not isinstance(ax, DatetimeIndex):
raise TypeError(
"axis must be a DatetimeIndex, but got "
Expand Down Expand Up @@ -1964,13 +1931,13 @@ def _insert_nat_bin(


def _adjust_dates_anchored(
first,
last,
freq,
first: Timestamp,
last: Timestamp,
freq: Tick,
closed: Literal["right", "left"] = "right",
origin="start_day",
offset: Timedelta | None = None,
):
) -> tuple[Timestamp, Timestamp]:
# First and last offsets should be calculated from the start day to fix an
# error cause by resampling across multiple days when a one day period is
# not a multiple of the frequency. See GH 8683
Expand Down