@@ -129,7 +129,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
129
129
series_index , name )
130
130
131
131
132
- def qcut (x , q , labels = None , retbins = False , precision = 3 ):
132
+ def qcut (x , q , labels = None , retbins = False , precision = 3 , duplicates = 'raise' ):
133
133
"""
134
134
Quantile-based discretization function. Discretize variable into
135
135
equal-sized buckets based on rank or based on sample quantiles. For example
@@ -151,6 +151,10 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
151
151
as a scalar.
152
152
precision : int
153
153
The precision at which to store and display the bins labels
154
+ duplicates : {default 'raise', 'drop'}, optional
155
+ If bin edges are not unique, raise ValueError or drop non-uniques.
156
+
157
+ .. versionadded:: 0.20.0
154
158
155
159
Returns
156
160
-------
@@ -187,22 +191,32 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
187
191
bins = algos .quantile (x , quantiles )
188
192
fac , bins = _bins_to_cuts (x , bins , labels = labels ,
189
193
precision = precision , include_lowest = True ,
190
- dtype = dtype )
194
+ dtype = dtype , duplicates = duplicates )
191
195
192
196
return _postprocess_for_cut (fac , bins , retbins , x_is_series ,
193
197
series_index , name )
194
198
195
199
196
200
def _bins_to_cuts (x , bins , right = True , labels = None ,
197
201
precision = 3 , include_lowest = False ,
198
- dtype = None ):
202
+ dtype = None , duplicates = 'raise' ):
203
+
204
+ if duplicates not in ['raise' , 'drop' ]:
205
+ raise ValueError ("invalid value for 'duplicates' parameter, "
206
+ "valid options are: raise, drop" )
207
+
208
+ unique_bins = algos .unique (bins )
209
+ if len (unique_bins ) < len (bins ):
210
+ if duplicates == 'raise' :
211
+ raise ValueError ("Bin edges must be unique: {}. You "
212
+ "can drop duplicate edges by setting "
213
+ "'duplicates' param" .format (repr (bins )))
214
+ else :
215
+ bins = unique_bins
199
216
200
217
side = 'left' if right else 'right'
201
218
ids = bins .searchsorted (x , side = side )
202
219
203
- if len (algos .unique (bins )) < len (bins ):
204
- raise ValueError ('Bin edges must be unique: %s' % repr (bins ))
205
-
206
220
if include_lowest :
207
221
ids [x == bins [0 ]] = 1
208
222
0 commit comments