|
82 | 82 |
|
83 | 83 | _series_apply_whitelist = \
|
84 | 84 | (_common_apply_whitelist - set(['boxplot'])) | \
|
85 |
| - frozenset(['dtype', 'value_counts', 'unique', 'nlargest', 'nsmallest']) |
| 85 | + frozenset(['dtype', 'unique', 'nlargest', 'nsmallest']) |
86 | 86 |
|
87 | 87 | _dataframe_apply_whitelist = \
|
88 | 88 | _common_apply_whitelist | frozenset(['dtypes', 'corrwith'])
|
@@ -2583,6 +2583,108 @@ def nunique(self, dropna=True):
|
2583 | 2583 | index=self.grouper.result_index,
|
2584 | 2584 | name=self.name)
|
2585 | 2585 |
|
| 2586 | + def value_counts(self, normalize=False, sort=True, ascending=False, |
| 2587 | + bins=None, dropna=True): |
| 2588 | + |
| 2589 | + from functools import partial |
| 2590 | + from pandas.tools.tile import cut |
| 2591 | + from pandas.tools.merge import _get_join_indexers |
| 2592 | + |
| 2593 | + if bins is not None and not np.iterable(bins): |
| 2594 | + # scalar bins cannot be done at top level |
| 2595 | + # in a backward compatible way |
| 2596 | + return self.apply(Series.value_counts, |
| 2597 | + normalize=normalize, |
| 2598 | + sort=sort, |
| 2599 | + ascending=ascending, |
| 2600 | + bins=bins) |
| 2601 | + |
| 2602 | + ids, _, _ = self.grouper.group_info |
| 2603 | + val = self.obj.get_values() |
| 2604 | + |
| 2605 | + # groupby removes null keys from groupings |
| 2606 | + mask = ids != -1 |
| 2607 | + ids, val = ids[mask], val[mask] |
| 2608 | + |
| 2609 | + if bins is None: |
| 2610 | + lab, lev = algos.factorize(val, sort=True) |
| 2611 | + else: |
| 2612 | + cat, bins = cut(val, bins, retbins=True) |
| 2613 | + # bins[:-1] for backward compat; |
| 2614 | + # o.w. cat.categories could be better |
| 2615 | + lab, lev, dropna = cat.codes, bins[:-1], False |
| 2616 | + |
| 2617 | + sorter = np.lexsort((lab, ids)) |
| 2618 | + ids, lab = ids[sorter], lab[sorter] |
| 2619 | + |
| 2620 | + # group boundries are where group ids change |
| 2621 | + idx = np.r_[0, 1 + np.nonzero(ids[1:] != ids[:-1])[0]] |
| 2622 | + |
| 2623 | + # new values are where sorted labels change |
| 2624 | + inc = np.r_[True, lab[1:] != lab[:-1]] |
| 2625 | + inc[idx] = True # group boundries are also new values |
| 2626 | + out = np.diff(np.nonzero(np.r_[inc, True])[0]) # value counts |
| 2627 | + |
| 2628 | + # num. of times each group should be repeated |
| 2629 | + rep = partial(np.repeat, repeats=np.add.reduceat(inc, idx)) |
| 2630 | + |
| 2631 | + # multi-index components |
| 2632 | + labels = list(map(rep, self.grouper.recons_labels)) + [lab[inc]] |
| 2633 | + levels = [ping.group_index for ping in self.grouper.groupings] + [lev] |
| 2634 | + names = self.grouper.names + [self.name] |
| 2635 | + |
| 2636 | + if dropna: |
| 2637 | + mask = labels[-1] != -1 |
| 2638 | + if mask.all(): |
| 2639 | + dropna = False |
| 2640 | + else: |
| 2641 | + out, labels = out[mask], [label[mask] for label in labels] |
| 2642 | + |
| 2643 | + if normalize: |
| 2644 | + out = out.astype('float') |
| 2645 | + acc = rep(np.diff(np.r_[idx, len(ids)])) |
| 2646 | + out /= acc[mask] if dropna else acc |
| 2647 | + |
| 2648 | + if sort and bins is None: |
| 2649 | + cat = ids[inc][mask] if dropna else ids[inc] |
| 2650 | + sorter = np.lexsort((out if ascending else -out, cat)) |
| 2651 | + out, labels[-1] = out[sorter], labels[-1][sorter] |
| 2652 | + |
| 2653 | + if bins is None: |
| 2654 | + mi = MultiIndex(levels=levels, labels=labels, names=names, |
| 2655 | + verify_integrity=False) |
| 2656 | + |
| 2657 | + return Series(out, index=mi) |
| 2658 | + |
| 2659 | + # for compat. with algos.value_counts need to ensure every |
| 2660 | + # bin is present at every index level, null filled with zeros |
| 2661 | + diff = np.zeros(len(out), dtype='bool') |
| 2662 | + for lab in labels[:-1]: |
| 2663 | + diff |= np.r_[True, lab[1:] != lab[:-1]] |
| 2664 | + |
| 2665 | + ncat, nbin = diff.sum(), len(levels[-1]) |
| 2666 | + |
| 2667 | + left = [np.repeat(np.arange(ncat), nbin), |
| 2668 | + np.tile(np.arange(nbin), ncat)] |
| 2669 | + |
| 2670 | + right = [diff.cumsum() - 1, labels[-1]] |
| 2671 | + |
| 2672 | + _, idx = _get_join_indexers(left, right, sort=False, how='left') |
| 2673 | + out = np.where(idx != -1, out[idx], 0) |
| 2674 | + |
| 2675 | + if sort: |
| 2676 | + sorter = np.lexsort((out if ascending else -out, left[0])) |
| 2677 | + out, left[-1] = out[sorter], left[-1][sorter] |
| 2678 | + |
| 2679 | + # build the multi-index w/ full levels |
| 2680 | + labels = list(map(lambda lab: np.repeat(lab[diff], nbin), labels[:-1])) |
| 2681 | + labels.append(left[-1]) |
| 2682 | + |
| 2683 | + mi = MultiIndex(levels=levels, labels=labels, names=names, |
| 2684 | + verify_integrity=False) |
| 2685 | + |
| 2686 | + return Series(out, index=mi) |
| 2687 | + |
2586 | 2688 | def _apply_to_column_groupbys(self, func):
|
2587 | 2689 | """ return a pass thru """
|
2588 | 2690 | return func(self)
|
|
0 commit comments