|
73 | 73 | Generator,
|
74 | 74 | Hashable,
|
75 | 75 | Iterator,
|
76 |
| - Sequence, |
77 | 76 | )
|
78 | 77 |
|
79 | 78 | from pandas.core.generic import NDFrame
|
@@ -581,25 +580,21 @@ class BaseGrouper:
|
581 | 580 | def __init__(
|
582 | 581 | self,
|
583 | 582 | axis: Index,
|
584 |
| - groupings: Sequence[grouper.Grouping], |
| 583 | + groupings: list[grouper.Grouping], |
585 | 584 | sort: bool = True,
|
586 | 585 | dropna: bool = True,
|
587 | 586 | ) -> None:
|
588 | 587 | assert isinstance(axis, Index), axis
|
589 | 588 |
|
590 | 589 | self.axis = axis
|
591 |
| - self._groupings: list[grouper.Grouping] = list(groupings) |
| 590 | + self._groupings = groupings |
592 | 591 | self._sort = sort
|
593 | 592 | self.dropna = dropna
|
594 | 593 |
|
595 | 594 | @property
|
596 | 595 | def groupings(self) -> list[grouper.Grouping]:
|
597 | 596 | return self._groupings
|
598 | 597 |
|
599 |
| - @property |
600 |
| - def shape(self) -> Shape: |
601 |
| - return tuple(ping.ngroups for ping in self.groupings) |
602 |
| - |
603 | 598 | def __iter__(self) -> Iterator[Hashable]:
|
604 | 599 | return iter(self.indices)
|
605 | 600 |
|
@@ -628,11 +623,15 @@ def _get_splitter(self, data: NDFrame) -> DataSplitter:
|
628 | 623 | -------
|
629 | 624 | Generator yielding subsetted objects
|
630 | 625 | """
|
631 |
| - ids, ngroups = self.group_info |
632 |
| - return _get_splitter( |
| 626 | + if isinstance(data, Series): |
| 627 | + klass: type[DataSplitter] = SeriesSplitter |
| 628 | + else: |
| 629 | + # i.e. DataFrame |
| 630 | + klass = FrameSplitter |
| 631 | + |
| 632 | + return klass( |
633 | 633 | data,
|
634 |
| - ids, |
635 |
| - ngroups, |
| 634 | + self.ngroups, |
636 | 635 | sorted_ids=self._sorted_ids,
|
637 | 636 | sort_idx=self.result_ilocs,
|
638 | 637 | )
|
@@ -692,7 +691,8 @@ def size(self) -> Series:
|
692 | 691 | """
|
693 | 692 | Compute group sizes.
|
694 | 693 | """
|
695 |
| - ids, ngroups = self.group_info |
| 694 | + ids = self.ids |
| 695 | + ngroups = self.ngroups |
696 | 696 | out: np.ndarray | list
|
697 | 697 | if ngroups:
|
698 | 698 | out = np.bincount(ids[ids != -1], minlength=ngroups)
|
@@ -729,12 +729,6 @@ def has_dropped_na(self) -> bool:
|
729 | 729 | """
|
730 | 730 | return bool((self.ids < 0).any())
|
731 | 731 |
|
732 |
| - @cache_readonly |
733 |
| - def group_info(self) -> tuple[npt.NDArray[np.intp], int]: |
734 |
| - result_index, ids = self.result_index_and_ids |
735 |
| - ngroups = len(result_index) |
736 |
| - return ids, ngroups |
737 |
| - |
738 | 732 | @cache_readonly
|
739 | 733 | def codes_info(self) -> npt.NDArray[np.intp]:
|
740 | 734 | # return the codes of items in original grouped axis
|
@@ -1123,10 +1117,6 @@ def indices(self):
|
1123 | 1117 | i = bin
|
1124 | 1118 | return indices
|
1125 | 1119 |
|
1126 |
| - @cache_readonly |
1127 |
| - def group_info(self) -> tuple[npt.NDArray[np.intp], int]: |
1128 |
| - return self.ids, self.ngroups |
1129 |
| - |
1130 | 1120 | @cache_readonly
|
1131 | 1121 | def codes(self) -> list[npt.NDArray[np.intp]]:
|
1132 | 1122 | return [self.ids]
|
@@ -1191,29 +1181,25 @@ class DataSplitter(Generic[NDFrameT]):
|
1191 | 1181 | def __init__(
|
1192 | 1182 | self,
|
1193 | 1183 | data: NDFrameT,
|
1194 |
| - labels: npt.NDArray[np.intp], |
1195 | 1184 | ngroups: int,
|
1196 | 1185 | *,
|
1197 | 1186 | sort_idx: npt.NDArray[np.intp],
|
1198 | 1187 | sorted_ids: npt.NDArray[np.intp],
|
1199 | 1188 | ) -> None:
|
1200 | 1189 | self.data = data
|
1201 |
| - self.labels = ensure_platform_int(labels) # _should_ already be np.intp |
1202 | 1190 | self.ngroups = ngroups
|
1203 | 1191 |
|
1204 | 1192 | self._slabels = sorted_ids
|
1205 | 1193 | self._sort_idx = sort_idx
|
1206 | 1194 |
|
1207 | 1195 | def __iter__(self) -> Iterator:
|
1208 |
| - sdata = self._sorted_data |
1209 |
| - |
1210 | 1196 | if self.ngroups == 0:
|
1211 | 1197 | # we are inside a generator, rather than raise StopIteration
|
1212 | 1198 | # we merely return signal the end
|
1213 | 1199 | return
|
1214 | 1200 |
|
1215 | 1201 | starts, ends = lib.generate_slices(self._slabels, self.ngroups)
|
1216 |
| - |
| 1202 | + sdata = self._sorted_data |
1217 | 1203 | for start, end in zip(starts, ends):
|
1218 | 1204 | yield self._chop(sdata, slice(start, end))
|
1219 | 1205 |
|
@@ -1241,20 +1227,3 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
|
1241 | 1227 | mgr = sdata._mgr.get_slice(slice_obj, axis=1)
|
1242 | 1228 | df = sdata._constructor_from_mgr(mgr, axes=mgr.axes)
|
1243 | 1229 | return df.__finalize__(sdata, method="groupby")
|
1244 |
| - |
1245 |
| - |
1246 |
| -def _get_splitter( |
1247 |
| - data: NDFrame, |
1248 |
| - labels: npt.NDArray[np.intp], |
1249 |
| - ngroups: int, |
1250 |
| - *, |
1251 |
| - sort_idx: npt.NDArray[np.intp], |
1252 |
| - sorted_ids: npt.NDArray[np.intp], |
1253 |
| -) -> DataSplitter: |
1254 |
| - if isinstance(data, Series): |
1255 |
| - klass: type[DataSplitter] = SeriesSplitter |
1256 |
| - else: |
1257 |
| - # i.e. DataFrame |
1258 |
| - klass = FrameSplitter |
1259 |
| - |
1260 |
| - return klass(data, labels, ngroups, sort_idx=sort_idx, sorted_ids=sorted_ids) |
0 commit comments