@@ -129,7 +129,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
129
129
series_index , name )
130
130
131
131
132
- def qcut (x , q , labels = None , retbins = False , precision = 3 ):
132
+ def qcut (x , q , labels = None , retbins = False , precision = 3 , duplicates = 'raise' ):
133
133
"""
134
134
Quantile-based discretization function. Discretize variable into
135
135
equal-sized buckets based on rank or based on sample quantiles. For example
@@ -151,6 +151,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
151
151
as a scalar.
152
152
precision : int
153
153
The precision at which to store and display the bins labels
154
+ duplicates : {'raise', 'drop'}, optional
155
+ If binned edges are not unique, raise ValueError or drop non-
uniques.
154
156
155
157
Returns
156
158
-------
@@ -187,15 +189,26 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
187
189
bins = algos .quantile (x , quantiles )
188
190
fac , bins = _bins_to_cuts (x , bins , labels = labels ,
189
191
precision = precision , include_lowest = True ,
190
- dtype = dtype )
192
+ dtype = dtype , duplicates = duplicates )
191
193
192
194
return _postprocess_for_cut (fac , bins , retbins , x_is_series ,
193
195
series_index , name )
194
196
195
197
196
198
def _bins_to_cuts (x , bins , right = True , labels = None ,
197
199
precision = 3 , include_lowest = False ,
198
- dtype = None ):
200
+ dtype = None , duplicates = 'raise' ):
201
+
202
+ if duplicates not in ['raise' , 'drop' ]:
203
+ raise ValueError ("invalid value for 'duplicates' parameter, "
204
+ + "valid options are: raise, drop" )
205
+
206
+ if duplicates == 'raise' :
207
+ raise ValueError ('Bin edges must be unique: %s' % repr (bins ) +
208
+ ' You can drop duplicate edges ' +
209
+ 'by setting \' duplicates\' param' )
210
+ else :
211
+ bins = algos .unique (bins )
199
212
200
213
side = 'left' if right else 'right'
201
214
ids = bins .searchsorted (x , side = side )
0 commit comments