Skip to content

Commit 57f418e

Browse files
committed
Removed block management
1 parent 6b3ba98 commit 57f418e

File tree

1 file changed

+0
-120
lines changed

1 file changed

+0
-120
lines changed

pandas/core/groupby/generic.py

-120
Original file line numberDiff line numberDiff line change
@@ -979,126 +979,6 @@ def _iterate_slices(self) -> Iterable[Series]:
979979

980980
yield values
981981

982-
def _cython_agg_general(
983-
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
984-
):
985-
new_items, new_blocks = self._cython_agg_blocks(
986-
how, alt=alt, numeric_only=numeric_only, min_count=min_count
987-
)
988-
return self._wrap_agged_blocks(new_items, new_blocks)
989-
990-
def _cython_agg_blocks(
991-
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
992-
):
993-
# TODO: the actual managing of mgr_locs is a PITA
994-
# here, it should happen via BlockManager.combine
995-
996-
data = self._get_data_to_aggregate()
997-
998-
if numeric_only:
999-
data = data.get_numeric_data(copy=False)
1000-
1001-
new_blocks = []
1002-
new_items = []
1003-
deleted_items = []
1004-
no_result = object()
1005-
for block in data.blocks:
1006-
# Avoid inheriting result from earlier in the loop
1007-
result = no_result
1008-
locs = block.mgr_locs.as_array
1009-
try:
1010-
result, _ = self.grouper.aggregate(
1011-
block.values, how, axis=1, min_count=min_count
1012-
)
1013-
except NotImplementedError:
1014-
# generally if we have numeric_only=False
1015-
# and non-applicable functions
1016-
# try to python agg
1017-
1018-
if alt is None:
1019-
# we cannot perform the operation
1020-
# in an alternate way, exclude the block
1021-
assert how == "ohlc"
1022-
deleted_items.append(locs)
1023-
continue
1024-
1025-
# call our grouper again with only this block
1026-
obj = self.obj[data.items[locs]]
1027-
if obj.shape[1] == 1:
1028-
# Avoid call to self.values that can occur in DataFrame
1029-
# reductions; see GH#28949
1030-
obj = obj.iloc[:, 0]
1031-
1032-
s = get_groupby(obj, self.grouper)
1033-
try:
1034-
result = s.aggregate(lambda x: alt(x, axis=self.axis))
1035-
except TypeError:
1036-
# we may have an exception in trying to aggregate
1037-
# continue and exclude the block
1038-
deleted_items.append(locs)
1039-
continue
1040-
else:
1041-
result = cast(DataFrame, result)
1042-
# unwrap DataFrame to get array
1043-
assert len(result._data.blocks) == 1
1044-
result = result._data.blocks[0].values
1045-
if isinstance(result, np.ndarray) and result.ndim == 1:
1046-
result = result.reshape(1, -1)
1047-
1048-
finally:
1049-
assert not isinstance(result, DataFrame)
1050-
1051-
if result is not no_result:
1052-
# see if we can cast the block back to the original dtype
1053-
result = maybe_downcast_numeric(result, block.dtype)
1054-
1055-
if block.is_extension and isinstance(result, np.ndarray):
1056-
# e.g. block.values was an IntegerArray
1057-
# (1, N) case can occur if block.values was Categorical
1058-
# and result is ndarray[object]
1059-
assert result.ndim == 1 or result.shape[0] == 1
1060-
try:
1061-
# Cast back if feasible
1062-
result = type(block.values)._from_sequence(
1063-
result.ravel(), dtype=block.values.dtype
1064-
)
1065-
except ValueError:
1066-
# reshape to be valid for non-Extension Block
1067-
result = result.reshape(1, -1)
1068-
1069-
newb = block.make_block(result)
1070-
1071-
new_items.append(locs)
1072-
new_blocks.append(newb)
1073-
1074-
if len(new_blocks) == 0:
1075-
raise DataError("No numeric types to aggregate")
1076-
1077-
# reset the locs in the blocks to correspond to our
1078-
# current ordering
1079-
indexer = np.concatenate(new_items)
1080-
new_items = data.items.take(np.sort(indexer))
1081-
1082-
if len(deleted_items):
1083-
1084-
# we need to adjust the indexer to account for the
1085-
# items we have removed
1086-
# really should be done in internals :<
1087-
1088-
deleted = np.concatenate(deleted_items)
1089-
ai = np.arange(len(data))
1090-
mask = np.zeros(len(data))
1091-
mask[deleted] = 1
1092-
indexer = (ai - mask.cumsum())[indexer]
1093-
1094-
offset = 0
1095-
for b in new_blocks:
1096-
loc = len(b.mgr_locs)
1097-
b.mgr_locs = indexer[offset : (offset + loc)]
1098-
offset += loc
1099-
1100-
return new_items, new_blocks
1101-
1102982
def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
1103983
if self.grouper.nkeys != 1:
1104984
raise AssertionError("Number of keys must be 1")

0 commit comments

Comments
 (0)