Skip to content

Commit 030d63b

Browse files
authored
REF: de-lazify Resampler init (#41245)
1 parent 387d611 commit 030d63b

File tree

2 files changed

+40
-81
lines changed

2 files changed

+40
-81
lines changed

pandas/core/groupby/groupby.py

+2-10
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ class BaseGroupBy(PandasObject, SelectionMixin, Generic[FrameOrSeries]):
546546
axis: int
547547
grouper: ops.BaseGrouper
548548
obj: FrameOrSeries
549+
group_keys: bool
549550

550551
@final
551552
def __len__(self) -> int:
@@ -556,26 +557,17 @@ def __repr__(self) -> str:
556557
# TODO: Better repr for GroupBy object
557558
return object.__repr__(self)
558559

559-
def _assure_grouper(self) -> None:
560-
"""
561-
We create the grouper on instantiation sub-classes may have a
562-
different policy.
563-
"""
564-
pass
565-
566560
@final
567561
@property
568562
def groups(self) -> dict[Hashable, np.ndarray]:
569563
"""
570564
Dict {group name -> group labels}.
571565
"""
572-
self._assure_grouper()
573566
return self.grouper.groups
574567

575568
@final
576569
@property
577570
def ngroups(self) -> int:
578-
self._assure_grouper()
579571
return self.grouper.ngroups
580572

581573
@final
@@ -584,7 +576,6 @@ def indices(self):
584576
"""
585577
Dict {group name -> group indices}.
586578
"""
587-
self._assure_grouper()
588579
return self.grouper.indices
589580

590581
@final
@@ -721,6 +712,7 @@ def get_group(self, name, obj=None):
721712

722713
return obj._take_with_is_copy(inds, axis=self.axis)
723714

715+
@final
724716
def __iter__(self) -> Iterator[tuple[Hashable, FrameOrSeries]]:
725717
"""
726718
Groupby iterator.

pandas/core/resample.py

+38-71
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from typing import (
77
TYPE_CHECKING,
88
Callable,
9+
Hashable,
910
no_type_check,
1011
)
1112

@@ -101,8 +102,8 @@ class Resampler(BaseGroupBy, PandasObject):
101102
102103
Parameters
103104
----------
104-
obj : pandas object
105-
groupby : a TimeGrouper object
105+
obj : Series or DataFrame
106+
groupby : TimeGrouper
106107
axis : int, default 0
107108
kind : str or None
108109
'period', 'timestamp' to override default index treatment
@@ -116,10 +117,8 @@ class Resampler(BaseGroupBy, PandasObject):
116117
After resampling, see aggregate, apply, and transform functions.
117118
"""
118119

119-
# error: Incompatible types in assignment (expression has type
120-
# "Optional[BinGrouper]", base class "BaseGroupBy" defined the type as
121-
# "BaseGrouper")
122-
grouper: BinGrouper | None # type: ignore[assignment]
120+
grouper: BinGrouper
121+
exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat
123122

124123
# to the groupby descriptor
125124
_attributes = [
@@ -134,7 +133,14 @@ class Resampler(BaseGroupBy, PandasObject):
134133
"offset",
135134
]
136135

137-
def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
136+
def __init__(
137+
self,
138+
obj: FrameOrSeries,
139+
groupby: TimeGrouper,
140+
axis: int = 0,
141+
kind=None,
142+
**kwargs,
143+
):
138144
self.groupby = groupby
139145
self.keys = None
140146
self.sort = True
@@ -143,12 +149,9 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
143149
self.squeeze = False
144150
self.group_keys = True
145151
self.as_index = True
146-
self.exclusions = set()
147-
self.binner = None
148-
self.grouper = None
149152

150-
if self.groupby is not None:
151-
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
153+
self.groupby._set_grouper(self._convert_obj(obj), sort=True)
154+
self.binner, self.grouper = self._get_binner()
152155

153156
@final
154157
def _shallow_copy(self, obj, **kwargs):
@@ -183,25 +186,12 @@ def __getattr__(self, attr: str):
183186

184187
return object.__getattribute__(self, attr)
185188

186-
def __iter__(self):
187-
"""
188-
Resampler iterator.
189-
190-
Returns
191-
-------
192-
Generator yielding sequence of (name, subsetted object)
193-
for each group.
194-
195-
See Also
196-
--------
197-
GroupBy.__iter__ : Generator yielding sequence for each group.
198-
"""
199-
self._set_binner()
200-
return super().__iter__()
201-
189+
# error: Signature of "obj" incompatible with supertype "BaseGroupBy"
202190
@property
203-
def obj(self):
204-
return self.groupby.obj
191+
def obj(self) -> FrameOrSeries: # type: ignore[override]
192+
# error: Incompatible return value type (got "Optional[Any]",
193+
# expected "FrameOrSeries")
194+
return self.groupby.obj # type: ignore[return-value]
205195

206196
@property
207197
def ax(self):
@@ -218,32 +208,24 @@ def _from_selection(self) -> bool:
218208
self.groupby.key is not None or self.groupby.level is not None
219209
)
220210

221-
def _convert_obj(self, obj):
211+
def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries:
222212
"""
223213
Provide any conversions for the object in order to correctly handle.
224214
225215
Parameters
226216
----------
227-
obj : the object to be resampled
217+
obj : Series or DataFrame
228218
229219
Returns
230220
-------
231-
obj : converted object
221+
Series or DataFrame
232222
"""
233223
return obj._consolidate()
234224

235225
def _get_binner_for_time(self):
236226
raise AbstractMethodError(self)
237227

238-
def _set_binner(self):
239-
"""
240-
Setup our binners.
241-
242-
Cache these as we are an immutable object
243-
"""
244-
if self.binner is None:
245-
self.binner, self.grouper = self._get_binner()
246-
228+
@final
247229
def _get_binner(self):
248230
"""
249231
Create the BinGrouper, assume that self.set_grouper(obj)
@@ -254,12 +236,6 @@ def _get_binner(self):
254236
bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
255237
return binner, bin_grouper
256238

257-
def _assure_grouper(self):
258-
"""
259-
Make sure that we are creating our binner & grouper.
260-
"""
261-
self._set_binner()
262-
263239
@Substitution(
264240
klass="Resampler",
265241
examples="""
@@ -349,7 +325,6 @@ def pipe(
349325
)
350326
def aggregate(self, func, *args, **kwargs):
351327

352-
self._set_binner()
353328
result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
354329
if result is None:
355330
how = func
@@ -400,7 +375,6 @@ def _gotitem(self, key, ndim: int, subset=None):
400375
subset : object, default None
401376
subset to act on
402377
"""
403-
self._set_binner()
404378
grouper = self.grouper
405379
if subset is None:
406380
subset = self.obj
@@ -417,7 +391,6 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
417391
Re-evaluate the obj with a groupby aggregation.
418392
"""
419393
if grouper is None:
420-
self._set_binner()
421394
grouper = self.grouper
422395

423396
obj = self._selected_obj
@@ -1050,8 +1023,8 @@ def __init__(self, obj, parent=None, groupby=None, **kwargs):
10501023
for attr in self._attributes:
10511024
setattr(self, attr, kwargs.get(attr, getattr(parent, attr)))
10521025

1053-
# error: Too many arguments for "__init__" of "object"
1054-
super().__init__(None) # type: ignore[call-arg]
1026+
self.binner = parent.binner
1027+
10551028
self._groupby = groupby
10561029
self._groupby.mutated = True
10571030
self._groupby.grouper.mutated = True
@@ -1137,7 +1110,6 @@ def _downsample(self, how, **kwargs):
11371110
how : string / cython mapped function
11381111
**kwargs : kw args passed to how function
11391112
"""
1140-
self._set_binner()
11411113
how = com.get_cython_func(how) or how
11421114
ax = self.ax
11431115
obj = self._selected_obj
@@ -1154,7 +1126,7 @@ def _downsample(self, how, **kwargs):
11541126
# error: Item "None" of "Optional[Any]" has no attribute "binlabels"
11551127
if (
11561128
(ax.freq is not None or ax.inferred_freq is not None)
1157-
and len(self.grouper.binlabels) > len(ax) # type: ignore[union-attr]
1129+
and len(self.grouper.binlabels) > len(ax)
11581130
and how is None
11591131
):
11601132

@@ -1196,7 +1168,6 @@ def _upsample(self, method, limit=None, fill_value=None):
11961168
.fillna: Fill NA/NaN values using the specified method.
11971169
11981170
"""
1199-
self._set_binner()
12001171
if self.axis:
12011172
raise AssertionError("axis must be 0")
12021173
if self._from_selection:
@@ -1257,7 +1228,7 @@ def _get_binner_for_time(self):
12571228
return super()._get_binner_for_time()
12581229
return self.groupby._get_period_bins(self.ax)
12591230

1260-
def _convert_obj(self, obj):
1231+
def _convert_obj(self, obj: FrameOrSeries) -> FrameOrSeries:
12611232
obj = super()._convert_obj(obj)
12621233

12631234
if self._from_selection:
@@ -1336,7 +1307,6 @@ def _upsample(self, method, limit=None, fill_value=None):
13361307
if self.kind == "timestamp":
13371308
return super()._upsample(method, limit=limit, fill_value=fill_value)
13381309

1339-
self._set_binner()
13401310
ax = self.ax
13411311
obj = self.obj
13421312
new_index = self.binner
@@ -1349,9 +1319,7 @@ def _upsample(self, method, limit=None, fill_value=None):
13491319
new_obj = _take_new_index(
13501320
obj,
13511321
indexer,
1352-
# error: Argument 3 to "_take_new_index" has incompatible type
1353-
# "Optional[Any]"; expected "Index"
1354-
new_index, # type: ignore[arg-type]
1322+
new_index,
13551323
axis=self.axis,
13561324
)
13571325
return self._wrap_result(new_obj)
@@ -1511,20 +1479,20 @@ def __init__(
15111479
else:
15121480
try:
15131481
self.origin = Timestamp(origin)
1514-
except Exception as e:
1482+
except (ValueError, TypeError) as err:
15151483
raise ValueError(
15161484
"'origin' should be equal to 'epoch', 'start', 'start_day', "
15171485
"'end', 'end_day' or "
15181486
f"should be a Timestamp convertible type. Got '{origin}' instead."
1519-
) from e
1487+
) from err
15201488

15211489
try:
15221490
self.offset = Timedelta(offset) if offset is not None else None
1523-
except Exception as e:
1491+
except (ValueError, TypeError) as err:
15241492
raise ValueError(
15251493
"'offset' should be a Timedelta convertible type. "
15261494
f"Got '{offset}' instead."
1527-
) from e
1495+
) from err
15281496

15291497
# always sort time groupers
15301498
kwargs["sort"] = True
@@ -1585,10 +1553,9 @@ def _get_resampler(self, obj, kind=None):
15851553
def _get_grouper(self, obj, validate: bool = True):
15861554
# create the resampler and return our binner
15871555
r = self._get_resampler(obj)
1588-
r._set_binner()
15891556
return r.binner, r.grouper, r.obj
15901557

1591-
def _get_time_bins(self, ax):
1558+
def _get_time_bins(self, ax: DatetimeIndex):
15921559
if not isinstance(ax, DatetimeIndex):
15931560
raise TypeError(
15941561
"axis must be a DatetimeIndex, but got "
@@ -1964,13 +1931,13 @@ def _insert_nat_bin(
19641931

19651932

19661933
def _adjust_dates_anchored(
1967-
first,
1968-
last,
1969-
freq,
1934+
first: Timestamp,
1935+
last: Timestamp,
1936+
freq: Tick,
19701937
closed: Literal["right", "left"] = "right",
19711938
origin="start_day",
19721939
offset: Timedelta | None = None,
1973-
):
1940+
) -> tuple[Timestamp, Timestamp]:
19741941
# First and last offsets should be calculated from the start day to fix an
19751942
# error cause by resampling across multiple days when a one day period is
19761943
# not a multiple of the frequency. See GH 8683

0 commit comments

Comments
 (0)