|
56 | 56 | )
|
57 | 57 | from pandas.core.index import Index, MultiIndex, _all_indexes_same
|
58 | 58 | import pandas.core.indexes.base as ibase
|
59 |
| -from pandas.core.internals import BlockManager, make_block |
60 | 59 | from pandas.core.series import Series
|
61 | 60 |
|
62 | 61 | from pandas.plotting import boxplot_frame_groupby
|
@@ -147,93 +146,6 @@ def _iterate_slices(self):
|
147 | 146 | continue
|
148 | 147 | yield val, slicer(val)
|
149 | 148 |
|
150 |
| - def _cython_agg_general(self, how, alt=None, numeric_only=True, min_count=-1): |
151 |
| - new_items, new_blocks = self._cython_agg_blocks( |
152 |
| - how, alt=alt, numeric_only=numeric_only, min_count=min_count |
153 |
| - ) |
154 |
| - return self._wrap_agged_blocks(new_items, new_blocks) |
155 |
| - |
156 |
| - _block_agg_axis = 0 |
157 |
| - |
158 |
| - def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1): |
159 |
| - # TODO: the actual managing of mgr_locs is a PITA |
160 |
| - # here, it should happen via BlockManager.combine |
161 |
| - |
162 |
| - data, agg_axis = self._get_data_to_aggregate() |
163 |
| - |
164 |
| - if numeric_only: |
165 |
| - data = data.get_numeric_data(copy=False) |
166 |
| - |
167 |
| - new_blocks = [] |
168 |
| - new_items = [] |
169 |
| - deleted_items = [] |
170 |
| - no_result = object() |
171 |
| - for block in data.blocks: |
172 |
| - # Avoid inheriting result from earlier in the loop |
173 |
| - result = no_result |
174 |
| - locs = block.mgr_locs.as_array |
175 |
| - try: |
176 |
| - result, _ = self.grouper.aggregate( |
177 |
| - block.values, how, axis=agg_axis, min_count=min_count |
178 |
| - ) |
179 |
| - except NotImplementedError: |
180 |
| - # generally if we have numeric_only=False |
181 |
| - # and non-applicable functions |
182 |
| - # try to python agg |
183 |
| - |
184 |
| - if alt is None: |
185 |
| - # we cannot perform the operation |
186 |
| - # in an alternate way, exclude the block |
187 |
| - deleted_items.append(locs) |
188 |
| - continue |
189 |
| - |
190 |
| - # call our grouper again with only this block |
191 |
| - obj = self.obj[data.items[locs]] |
192 |
| - s = groupby(obj, self.grouper) |
193 |
| - try: |
194 |
| - result = s.aggregate(lambda x: alt(x, axis=self.axis)) |
195 |
| - except TypeError: |
196 |
| - # we may have an exception in trying to aggregate |
197 |
| - # continue and exclude the block |
198 |
| - deleted_items.append(locs) |
199 |
| - continue |
200 |
| - finally: |
201 |
| - if result is not no_result: |
202 |
| - # see if we can cast the block back to the original dtype |
203 |
| - result = maybe_downcast_numeric(result, block.dtype) |
204 |
| - newb = block.make_block(result) |
205 |
| - |
206 |
| - new_items.append(locs) |
207 |
| - new_blocks.append(newb) |
208 |
| - |
209 |
| - if len(new_blocks) == 0: |
210 |
| - raise DataError("No numeric types to aggregate") |
211 |
| - |
212 |
| - # reset the locs in the blocks to correspond to our |
213 |
| - # current ordering |
214 |
| - indexer = np.concatenate(new_items) |
215 |
| - new_items = data.items.take(np.sort(indexer)) |
216 |
| - |
217 |
| - if len(deleted_items): |
218 |
| - |
219 |
| - # we need to adjust the indexer to account for the |
220 |
| - # items we have removed |
221 |
| - # really should be done in internals :< |
222 |
| - |
223 |
| - deleted = np.concatenate(deleted_items) |
224 |
| - ai = np.arange(len(data)) |
225 |
| - mask = np.zeros(len(data)) |
226 |
| - mask[deleted] = 1 |
227 |
| - indexer = (ai - mask.cumsum())[indexer] |
228 |
| - |
229 |
| - offset = 0 |
230 |
| - for b in new_blocks: |
231 |
| - loc = len(b.mgr_locs) |
232 |
| - b.mgr_locs = indexer[offset : (offset + loc)] |
233 |
| - offset += loc |
234 |
| - |
235 |
| - return new_items, new_blocks |
236 |
| - |
237 | 149 | def aggregate(self, func, *args, **kwargs):
|
238 | 150 | _level = kwargs.pop("_level", None)
|
239 | 151 |
|
@@ -1385,7 +1297,6 @@ class DataFrameGroupBy(NDFrameGroupBy):
|
1385 | 1297 |
|
1386 | 1298 | _apply_whitelist = base.dataframe_apply_whitelist
|
1387 | 1299 |
|
1388 |
| - _block_agg_axis = 1 |
1389 | 1300 |
|
1390 | 1301 | _agg_see_also_doc = dedent(
|
1391 | 1302 | """
|
@@ -1571,24 +1482,6 @@ def _wrap_aggregated_output(self, output, names=None):
|
1571 | 1482 | def _wrap_transformed_output(self, output, names=None):
|
1572 | 1483 | return DataFrame(output, index=self.obj.index)
|
1573 | 1484 |
|
1574 |
| - def _wrap_agged_blocks(self, items, blocks): |
1575 |
| - if not self.as_index: |
1576 |
| - index = np.arange(blocks[0].values.shape[-1]) |
1577 |
| - mgr = BlockManager(blocks, [items, index]) |
1578 |
| - result = DataFrame(mgr) |
1579 |
| - |
1580 |
| - self._insert_inaxis_grouper_inplace(result) |
1581 |
| - result = result._consolidate() |
1582 |
| - else: |
1583 |
| - index = self.grouper.result_index |
1584 |
| - mgr = BlockManager(blocks, [items, index]) |
1585 |
| - result = DataFrame(mgr) |
1586 |
| - |
1587 |
| - if self.axis == 1: |
1588 |
| - result = result.T |
1589 |
| - |
1590 |
| - return self._reindex_output(result)._convert(datetime=True) |
1591 |
| - |
1592 | 1485 | def _iterate_column_groupbys(self):
|
1593 | 1486 | for i, colname in enumerate(self._selected_obj.columns):
|
1594 | 1487 | yield colname, SeriesGroupBy(
|
@@ -1616,20 +1509,23 @@ def count(self):
|
1616 | 1509 | DataFrame
|
1617 | 1510 | Count of values within each group.
|
1618 | 1511 | """
|
1619 |
| - data, _ = self._get_data_to_aggregate() |
1620 |
| - ids, _, ngroups = self.grouper.group_info |
1621 |
| - mask = ids != -1 |
| 1512 | + obj = self._selected_obj |
1622 | 1513 |
|
1623 |
| - val = ( |
1624 |
| - (mask & ~_isna_ndarraylike(np.atleast_2d(blk.get_values()))) |
1625 |
| - for blk in data.blocks |
1626 |
| - ) |
1627 |
| - loc = (blk.mgr_locs for blk in data.blocks) |
| 1514 | + def groupby_series(obj, col=None): |
| 1515 | + return SeriesGroupBy(obj, selection=col, grouper=self.grouper).count() |
| 1516 | + |
| 1517 | + if isinstance(obj, Series): |
| 1518 | + results = groupby_series(obj) |
| 1519 | + else: |
| 1520 | + from pandas.core.reshape.concat import concat |
1628 | 1521 |
|
1629 |
| - counter = partial(lib.count_level_2d, labels=ids, max_bin=ngroups, axis=1) |
1630 |
| - blk = map(make_block, map(counter, val), loc) |
| 1522 | + results = [groupby_series(obj[col], col) for col in obj.columns] |
| 1523 | + results = concat(results, axis=1) |
| 1524 | + results.columns.names = obj.columns.names |
1631 | 1525 |
|
1632 |
| - return self._wrap_agged_blocks(data.items, list(blk)) |
| 1526 | + if not self.as_index: |
| 1527 | + results.index = ibase.default_index(len(results)) |
| 1528 | + return results |
1633 | 1529 |
|
1634 | 1530 | def nunique(self, dropna=True):
|
1635 | 1531 | """
|
|
0 commit comments