Skip to content

Commit b694b09

Browse files
committed
Removed block code
1 parent 88a6ee1 commit b694b09

File tree

1 file changed

+14
-118
lines changed

1 file changed

+14
-118
lines changed

pandas/core/groupby/generic.py

+14-118
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,6 @@
5656
)
5757
from pandas.core.index import Index, MultiIndex, _all_indexes_same
5858
import pandas.core.indexes.base as ibase
59-
from pandas.core.internals import BlockManager, make_block
6059
from pandas.core.series import Series
6160

6261
from pandas.plotting import boxplot_frame_groupby
@@ -147,93 +146,6 @@ def _iterate_slices(self):
147146
continue
148147
yield val, slicer(val)
149148

150-
def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1):
151-
new_items, new_blocks = self._cython_agg_blocks(
152-
how, alt=alt, numeric_only=numeric_only, min_count=min_count
153-
)
154-
return self._wrap_agged_blocks(new_items, new_blocks)
155-
156-
_block_agg_axis = 0
157-
158-
def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
159-
# TODO: the actual managing of mgr_locs is a PITA
160-
# here, it should happen via BlockManager.combine
161-
162-
data, agg_axis = self._get_data_to_aggregate()
163-
164-
if numeric_only:
165-
data = data.get_numeric_data(copy=False)
166-
167-
new_blocks = []
168-
new_items = []
169-
deleted_items = []
170-
no_result = object()
171-
for block in data.blocks:
172-
# Avoid inheriting result from earlier in the loop
173-
result = no_result
174-
locs = block.mgr_locs.as_array
175-
try:
176-
result, _ = self.grouper.aggregate(
177-
block.values, how, axis=agg_axis, min_count=min_count
178-
)
179-
except NotImplementedError:
180-
# generally if we have numeric_only=False
181-
# and non-applicable functions
182-
# try to python agg
183-
184-
if alt is None:
185-
# we cannot perform the operation
186-
# in an alternate way, exclude the block
187-
deleted_items.append(locs)
188-
continue
189-
190-
# call our grouper again with only this block
191-
obj = self.obj[data.items[locs]]
192-
s = groupby(obj, self.grouper)
193-
try:
194-
result = s.aggregate(lambda x: alt(x, axis=self.axis))
195-
except TypeError:
196-
# we may have an exception in trying to aggregate
197-
# continue and exclude the block
198-
deleted_items.append(locs)
199-
continue
200-
finally:
201-
if result is not no_result:
202-
# see if we can cast the block back to the original dtype
203-
result = maybe_downcast_numeric(result, block.dtype)
204-
newb = block.make_block(result)
205-
206-
new_items.append(locs)
207-
new_blocks.append(newb)
208-
209-
if len(new_blocks) == 0:
210-
raise DataError("No numeric types to aggregate")
211-
212-
# reset the locs in the blocks to correspond to our
213-
# current ordering
214-
indexer = np.concatenate(new_items)
215-
new_items = data.items.take(np.sort(indexer))
216-
217-
if len(deleted_items):
218-
219-
# we need to adjust the indexer to account for the
220-
# items we have removed
221-
# really should be done in internals :<
222-
223-
deleted = np.concatenate(deleted_items)
224-
ai = np.arange(len(data))
225-
mask = np.zeros(len(data))
226-
mask[deleted] = 1
227-
indexer = (ai - mask.cumsum())[indexer]
228-
229-
offset = 0
230-
for b in new_blocks:
231-
loc = len(b.mgr_locs)
232-
b.mgr_locs = indexer[offset : (offset + loc)]
233-
offset += loc
234-
235-
return new_items, new_blocks
236-
237149
def aggregate(self, func, *args, **kwargs):
238150
_level = kwargs.pop("_level", None)
239151

@@ -1385,7 +1297,6 @@ class DataFrameGroupBy(NDFrameGroupBy):
13851297

13861298
_apply_whitelist = base.dataframe_apply_whitelist
13871299

1388-
_block_agg_axis = 1
13891300

13901301
_agg_see_also_doc = dedent(
13911302
"""
@@ -1571,24 +1482,6 @@ def _wrap_aggregated_output(self, output, names=None):
15711482
def _wrap_transformed_output(self, output, names=None):
15721483
return DataFrame(output, index=self.obj.index)
15731484

1574-
def _wrap_agged_blocks(self, items, blocks):
1575-
if not self.as_index:
1576-
index = np.arange(blocks[0].values.shape[-1])
1577-
mgr = BlockManager(blocks, [items, index])
1578-
result = DataFrame(mgr)
1579-
1580-
self._insert_inaxis_grouper_inplace(result)
1581-
result = result._consolidate()
1582-
else:
1583-
index = self.grouper.result_index
1584-
mgr = BlockManager(blocks, [items, index])
1585-
result = DataFrame(mgr)
1586-
1587-
if self.axis == 1:
1588-
result = result.T
1589-
1590-
return self._reindex_output(result)._convert(datetime=True)
1591-
15921485
def _iterate_column_groupbys(self):
15931486
for i, colname in enumerate(self._selected_obj.columns):
15941487
yield colname, SeriesGroupBy(
@@ -1616,20 +1509,23 @@ def count(self):
16161509
DataFrame
16171510
Count of values within each group.
16181511
"""
1619-
data, _ = self._get_data_to_aggregate()
1620-
ids, _, ngroups = self.grouper.group_info
1621-
mask = ids != -1
1512+
obj = self._selected_obj
16221513

1623-
val = (
1624-
(mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values())))
1625-
for blk in data.blocks
1626-
)
1627-
loc = (blk.mgr_locs for blk in data.blocks)
1514+
def groupby_series(obj, col=None):
1515+
return SeriesGroupBy(obj, selection=col, grouper=self.grouper).count()
1516+
1517+
if isinstance(obj, Series):
1518+
results = groupby_series(obj)
1519+
else:
1520+
from pandas.core.reshape.concat import concat
16281521

1629-
counter = partial(lib.count_level_2d, labels=ids, max_bin=ngroups, axis=1)
1630-
blk = map(make_block, map(counter, val), loc)
1522+
results = [groupby_series(obj[col], col) for col in obj.columns]
1523+
results = concat(results, axis=1)
1524+
results.columns.names = obj.columns.names
16311525

1632-
return self._wrap_agged_blocks(data.items, list(blk))
1526+
if not self.as_index:
1527+
results.index = ibase.default_index(len(results))
1528+
return results
16331529

16341530
def nunique(self, dropna=True):
16351531
"""

0 commit comments

Comments
 (0)