|
3 | 3 | """
|
4 | 4 |
|
5 | 5 | from pandas.core.api import DataFrame, Series
|
| 6 | +import pandas.core.algorithms as algos |
6 | 7 | import pandas.core.common as com
|
7 | 8 | import pandas.core.nanops as nanops
|
8 | 9 |
|
@@ -92,13 +93,56 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
|
92 | 93 | if (np.diff(bins) < 0).any():
|
93 | 94 | raise ValueError('bins must increase monotonically.')
|
94 | 95 |
|
| 96 | + return _bins_to_cuts(x, bins, right=right, labels=labels, |
| 97 | + retbins=retbins, precision=precision) |
| 98 | + |
| 99 | + |
| 100 | + |
| 101 | +def qcut(x, q=4, labels=None, retbins=False, precision=3): |
| 102 | + """ |
| 103 | + Quantile-based discretization function. Discretize variable into |
| 104 | + equal-sized buckets based on rank or based on sample quantiles. For example |
| 105 | + 1000 values for 10 quantiles would produce 1000 integers from 0 to 9 |
| 106 | + indicating the |
| 107 | +
|
| 108 | + Parameters |
| 109 | + ---------- |
| 110 | + x : ndarray or Series |
| 111 | + q : integer or array of quantiles |
| 112 | + Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately |
| 113 | + array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles |
| 114 | + labels : array or boolean, default None |
| 115 | + Labels to use for bin edges, or False to return integer bin labels |
| 116 | + retbins : bool, optional |
| 117 | + Whether to return the bins or not. Can be useful if bins is given |
| 118 | + as a scalar. |
| 119 | +
|
| 120 | + Returns |
| 121 | + ------- |
| 122 | +
|
| 123 | + Notes |
| 124 | + ----- |
| 125 | +
|
| 126 | + Examples |
| 127 | + -------- |
| 128 | + """ |
| 129 | + if com.is_integer(q): |
| 130 | + quantiles = np.linspace(0, 1, q + 1) |
| 131 | + bins = algos.quantile(x, quantiles) |
| 132 | + return _bins_to_cuts(x, bins, labels=labels, retbins=retbins, |
| 133 | + precision=precision) |
| 134 | + else: |
| 135 | + raise NotImplementedError |
| 136 | + |
| 137 | + |
| 138 | +def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, |
| 139 | + precision=3): |
95 | 140 | side = 'left' if right else 'right'
|
96 | 141 | ids = bins.searchsorted(x, side=side)
|
97 | 142 |
|
98 | 143 | mask = com.isnull(x)
|
99 | 144 | has_nas = mask.any()
|
100 | 145 |
|
101 |
| - |
102 | 146 | if labels is not False:
|
103 | 147 | if labels is None:
|
104 | 148 | labels = bins
|
@@ -132,35 +176,6 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3):
|
132 | 176 | return labels, bins
|
133 | 177 |
|
134 | 178 |
|
135 |
| -def qcut(x, n, ties_method='average'): |
136 |
| - """ |
137 |
| - Quantile-based discretization function. Discretize variable into |
138 |
| - equal-sized buckets based on rank. For example 1000 values for 10 quantiles |
139 |
| - would produce 1000 integers from 0 to 9 indicating the |
140 |
| -
|
141 |
| - Parameters |
142 |
| - ---------- |
143 |
| - x : ndarray or Series |
144 |
| - n : integer |
145 |
| - Number of quantiles. 10 for deciles, 4 for quartiles, etc. |
146 |
| - ties_method : {'average', 'min', 'max', 'first'}, default 'average' |
147 |
| - average: average rank of group |
148 |
| - min: lowest rank in group |
149 |
| - max: highest rank in group |
150 |
| - first: ranks assigned in order they appear in the array |
151 |
| -
|
152 |
| - Returns |
153 |
| - ------- |
154 |
| -
|
155 |
| - Notes |
156 |
| - ----- |
157 |
| -
|
158 |
| - Examples |
159 |
| - -------- |
160 |
| - """ |
161 |
| - pass |
162 |
| - |
163 |
| - |
164 | 179 | def _format_label(x, precision=3):
|
165 | 180 | fmt_str = '%%.%dg' % precision
|
166 | 181 | if com.is_float(x):
|
|
0 commit comments