From e3f7d24c6c661ded3b97ded142f2b257d7fe7e5e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Apr 2021 15:03:15 -0700 Subject: [PATCH 1/3] REF: de-lazify Resampler init --- pandas/core/groupby/groupby.py | 12 +---- pandas/core/resample.py | 97 +++++++++++----------------------- 2 files changed, 34 insertions(+), 75 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 7fe9d7cb49eb5..f6532aea757de 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -546,6 +546,7 @@ class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]): axis: int grouper: ops.BaseGrouper obj: FrameOrSeries + group_keys: bool @final def __len__(self) -> int: @@ -556,26 +557,17 @@ def __repr__(self) -> str: # TODO: Better repr for GroupBy object return object.__repr__(self) - def _assure_grouper(self) -> None: - """ - We create the grouper on instantiation sub-classes may have a - different policy. - """ - pass - @final @property def groups(self) -> dict[Hashable, np.ndarray]: """ Dict {group name -> group labels}. """ - self._assure_grouper() return self.grouper.groups @final @property def ngroups(self) -> int: - self._assure_grouper() return self.grouper.ngroups @final @@ -584,7 +576,6 @@ def indices(self): """ Dict {group name -> group indices}. """ - self._assure_grouper() return self.grouper.indices @final @@ -721,6 +712,7 @@ def get_group(self, name, obj=None): return obj._take_with_is_copy(inds, axis=self.axis) + @final def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]: """ Groupby iterator. diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 141ea4babe970..650d98e94c05e 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -101,8 +101,8 @@ class Resampler(BaseGroupBy, PandasObject): Parameters ---------- - obj : pandas object - groupby : a TimeGrouper object + obj : Series or DataFrame + groupby : TimeGrouper axis : int, default 0 kind : str or None 'period', 'timestamp' to override default index treatment @@ -116,10 +116,7 @@ class Resampler(BaseGroupBy, PandasObject): After resampling, see aggregate, apply, and transform functions. """ - # error: Incompatible types in assignment (expression has type - # "Optional[BinGrouper]", base class "BaseGroupBy" defined the type as - # "BaseGrouper") - grouper: BinGrouper | None # type: ignore[assignment] + grouper: BinGrouper # to the groupby descriptor _attributes = [ @@ -134,7 +131,14 @@ class Resampler(BaseGroupBy, PandasObject): "offset", ] - def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): + def __init__( + self, + obj: FrameOrSeries, + groupby: TimeGrouper, + axis: int = 0, + kind=None, + **kwargs, + ): self.groupby = groupby self.keys = None self.sort = True @@ -145,10 +149,9 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs): self.as_index = True self.exclusions = set() self.binner = None - self.grouper = None - if self.groupby is not None: - self.groupby._set_grouper(self._convert_obj(obj), sort=True) + self.groupby._set_grouper(self._convert_obj(obj), sort=True) + self.binner, self.grouper = self._get_binner() @final def _shallow_copy(self, obj, **kwargs): @@ -183,24 +186,9 @@ def __getattr__(self, attr: str): return object.__getattribute__(self, attr) - def __iter__(self): - """ - Resampler iterator. - - Returns - ------- - Generator yielding sequence of (name, subsetted object) - for each group. - - See Also - -------- - GroupBy.__iter__ : Generator yielding sequence for each group. - """ - self._set_binner() - return super().__iter__() - + # error: Signature of "obj" incompatible with supertype "BaseGroupBy" @property - def obj(self): + def obj(self) -> FrameOrSeries: # type: ignore[override] return self.groupby.obj @property @@ -227,32 +215,24 @@ def _from_selection(self) -> bool: self.groupby.key is not None or self.groupby.level is not None ) - def _convert_obj(self, obj): + def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries: """ Provide any conversions for the object in order to correctly handle. Parameters ---------- - obj : the object to be resampled + obj : Series or DataFrame Returns ------- - obj : converted object + Series or DataFrame """ return obj._consolidate() def _get_binner_for_time(self): raise AbstractMethodError(self) - def _set_binner(self): - """ - Setup our binners. - - Cache these as we are an immutable object - """ - if self.binner is None: - self.binner, self.grouper = self._get_binner() - + @final def _get_binner(self): """ Create the BinGrouper, assume that self.set_grouper(obj) @@ -263,12 +243,6 @@ def _get_binner(self): bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer) return binner, bin_grouper - def _assure_grouper(self): - """ - Make sure that we are creating our binner & grouper. - """ - self._set_binner() - @Substitution( klass="Resampler", examples=""" @@ -358,7 +332,6 @@ def pipe( ) def aggregate(self, func, *args, **kwargs): - self._set_binner() result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg() if result is None: how = func @@ -409,7 +382,6 @@ def _gotitem(self, key, ndim: int, subset=None): subset : object, default None subset to act on """ - self._set_binner() grouper = self.grouper if subset is None: subset = self.obj @@ -426,7 +398,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs): Re-evaluate the obj with a groupby aggregation. """ if grouper is None: - self._set_binner() grouper = self.grouper obj = self._selected_obj @@ -1052,8 +1023,8 @@ def __init__(self, obj, parent=None, groupby=None, **kwargs): for attr in self._attributes: setattr(self, attr, kwargs.get(attr, getattr(parent, attr))) - # error: Too many arguments for "__init__" of "object" - super().__init__(None) # type: ignore[call-arg] + self.binner = parent.binner + self._groupby = groupby self._groupby.mutated = True self._groupby.grouper.mutated = True @@ -1139,7 +1110,6 @@ def _downsample(self, how, **kwargs): how : string / cython mapped function **kwargs : kw args passed to how function """ - self._set_binner() how = com.get_cython_func(how) or how ax = self.ax obj = self._selected_obj @@ -1156,7 +1126,7 @@ def _downsample(self, how, **kwargs): # error: Item "None" of "Optional[Any]" has no attribute "binlabels" if ( (ax.freq is not None or ax.inferred_freq is not None) - and len(self.grouper.binlabels) > len(ax) # type: ignore[union-attr] + and len(self.grouper.binlabels) > len(ax) and how is None ): @@ -1198,7 +1168,6 @@ def _upsample(self, method, limit=None, fill_value=None): .fillna: Fill NA/NaN values using the specified method. """ - self._set_binner() if self.axis: raise AssertionError("axis must be 0") if self._from_selection: @@ -1259,7 +1228,7 @@ def _get_binner_for_time(self): return super()._get_binner_for_time() return self.groupby._get_period_bins(self.ax) - def _convert_obj(self, obj): + def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries: obj = super()._convert_obj(obj) if self._from_selection: @@ -1338,7 +1307,6 @@ def _upsample(self, method, limit=None, fill_value=None): if self.kind == "timestamp": return super()._upsample(method, limit=limit, fill_value=fill_value) - self._set_binner() ax = self.ax obj = self.obj new_index = self.binner @@ -1513,20 +1481,20 @@ def __init__( else: try: self.origin = Timestamp(origin) - except Exception as e: + except (ValueError, TypeError) as err: raise ValueError( "'origin' should be equal to 'epoch', 'start', 'start_day', " "'end', 'end_day' or " f"should be a Timestamp convertible type. Got '{origin}' instead." - ) from e + ) from err try: self.offset = Timedelta(offset) if offset is not None else None - except Exception as e: + except (ValueError, TypeError) as err: raise ValueError( "'offset' should be a Timedelta convertible type. " f"Got '{offset}' instead." - ) from e + ) from err # always sort time groupers kwargs["sort"] = True @@ -1587,10 +1555,9 @@ def _get_resampler(self, obj, kind=None): def _get_grouper(self, obj, validate: bool = True): # create the resampler and return our binner r = self._get_resampler(obj) - r._set_binner() return r.binner, r.grouper, r.obj - def _get_time_bins(self, ax): + def _get_time_bins(self, ax: DatetimeIndex): if not isinstance(ax, DatetimeIndex): raise TypeError( "axis must be a DatetimeIndex, but got " @@ -1966,13 +1933,13 @@ def _insert_nat_bin( def _adjust_dates_anchored( - first, - last, - freq, + first: Timestamp, + last: Timestamp, + freq: Tick, closed: Literal["right", "left"] = "right", origin="start_day", offset: Timedelta | None = None, -): +) -> tuple[Timestamp, Timestamp]: # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. See GH 8683 From 431a4a090d8293f7e7c6b757983628d22e0b72b6 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Apr 2021 15:37:59 -0700 Subject: [PATCH 2/3] remove redundant setattr --- pandas/core/resample.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 650d98e94c05e..57aba724167f7 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -148,7 +148,6 @@ def __init__( self.group_keys = True self.as_index = True self.exclusions = set() - self.binner = None self.groupby._set_grouper(self._convert_obj(obj), sort=True) self.binner, self.grouper = self._get_binner() From bcb745f9b652d602dcb8243df2b9706b3bdc40ab Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 30 Apr 2021 18:44:37 -0700 Subject: [PATCH 3/3] mypy fixup --- pandas/core/resample.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index a6dbe1377b254..213c20294025d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -6,6 +6,7 @@ from typing import ( TYPE_CHECKING, Callable, + Hashable, no_type_check, ) @@ -117,6 +118,7 @@ class Resampler(BaseGroupBy, PandasObject): """ grouper: BinGrouper + exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat # to the groupby descriptor _attributes = [ @@ -147,7 +149,6 @@ def __init__( self.squeeze = False self.group_keys = True self.as_index = True - self.exclusions = set() self.groupby._set_grouper(self._convert_obj(obj), sort=True) self.binner, self.grouper = self._get_binner() @@ -188,7 +189,9 @@ def __getattr__(self, attr: str): # error: Signature of "obj" incompatible with supertype "BaseGroupBy" @property def obj(self) -> FrameOrSeries: # type: ignore[override] - return self.groupby.obj + # error: Incompatible return value type (got "Optional[Any]", + # expected "FrameOrSeries") + return self.groupby.obj # type: ignore[return-value] @property def ax(self): @@ -1316,9 +1319,7 @@ def _upsample(self, method, limit=None, fill_value=None): new_obj = _take_new_index( obj, indexer, - # error: Argument 3 to "_take_new_index" has incompatible type - # "Optional[Any]"; expected "Index" - new_index, # type: ignore[arg-type] + new_index, axis=self.axis, ) return self._wrap_result(new_obj)