Skip to content

Commit b7d92dc

Browse files
committed
Added 'allow' duplicates option to _bins_to_cuts
1 parent f56a27f commit b7d92dc

File tree

2 files changed

+30
-11
lines changed

2 files changed

+30
-11
lines changed

pandas/tests/tools/test_tile.py

+25-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
from pandas.compat import zip
55

6-
from pandas import Series, Index
6+
from pandas import Series, Index, Categorical
77
import pandas.util.testing as tm
88
from pandas.util.testing import assertRaisesRegexp
99
import pandas.core.common as com
@@ -239,7 +239,6 @@ def test_qcut_binning_issues(self):
239239
self.assertTrue(ep <= sn)
240240

241241
def test_cut_return_categorical(self):
242-
from pandas import Categorical
243242
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
244243
res = cut(s, 3)
245244
exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -249,7 +248,6 @@ def test_cut_return_categorical(self):
249248
tm.assert_series_equal(res, exp)
250249

251250
def test_qcut_return_categorical(self):
252-
from pandas import Categorical
253251
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
254252
res = qcut(s, [0, 0.333, 0.666, 1])
255253
exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -292,28 +290,52 @@ def test_single_quantile(self):
292290
s = Series([9., 9.])
293291
result = qcut(s, 1, labels=False)
294292
tm.assert_series_equal(result, expected)
293+
result = qcut(s, 1)
294+
exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"],
295+
ordered=True))
296+
tm.assert_series_equal(result, exp_lab)
295297

296298
s = Series([-9., -9.])
297299
result = qcut(s, 1, labels=False)
298300
tm.assert_series_equal(result, expected)
301+
result = qcut(s, 1)
302+
exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"],
303+
ordered=True))
304+
tm.assert_series_equal(result, exp_lab)
299305

300306
s = Series([0., 0.])
301307
result = qcut(s, 1, labels=False)
302308
tm.assert_series_equal(result, expected)
309+
result = qcut(s, 1)
310+
exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"],
311+
ordered=True))
312+
tm.assert_series_equal(result, exp_lab)
303313

304314
expected = Series([0])
305315

306316
s = Series([9])
307317
result = qcut(s, 1, labels=False)
308318
tm.assert_series_equal(result, expected)
319+
result = qcut(s, 1)
320+
exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"],
321+
ordered=True))
322+
tm.assert_series_equal(result, exp_lab)
309323

310324
s = Series([-9])
311325
result = qcut(s, 1, labels=False)
312326
tm.assert_series_equal(result, expected)
327+
result = qcut(s, 1)
328+
exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"],
329+
ordered=True))
330+
tm.assert_series_equal(result, exp_lab)
313331

314332
s = Series([0])
315333
result = qcut(s, 1, labels=False)
316334
tm.assert_series_equal(result, expected)
335+
result = qcut(s, 1)
336+
exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"],
337+
ordered=True))
338+
tm.assert_series_equal(result, exp_lab)
317339

318340
def test_single_bin(self):
319341
# issue 14652

pandas/tools/tile.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -186,16 +186,13 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
186186

187187
if is_integer(q):
188188
quantiles = np.linspace(0, 1, q + 1)
189+
190+
if q == 1:
191+
duplicates = 'allow'
189192
else:
190193
quantiles = q
191194
bins = algos.quantile(x, quantiles)
192195

193-
# fix special case: q=1 and all identical values
194-
if q == 1 and len(bins) == 2 and bins[0] == bins[1]:
195-
bins = np.asarray(bins, np.float64)
196-
bins[0] -= .001 * abs(bins[0]) if bins[0] != 0 else .001
197-
bins[1] += .001 * abs(bins[1]) if bins[1] != 0 else .001
198-
199196
fac, bins = _bins_to_cuts(x, bins, labels=labels,
200197
precision=precision, include_lowest=True,
201198
dtype=dtype, duplicates=duplicates)
@@ -208,7 +205,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
208205
precision=3, include_lowest=False,
209206
dtype=None, duplicates='raise'):
210207

211-
if duplicates not in ['raise', 'drop']:
208+
if duplicates not in ['raise', 'drop', 'allow']:
212209
raise ValueError("invalid value for 'duplicates' parameter, "
213210
"valid options are: raise, drop")
214211

@@ -218,7 +215,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
218215
raise ValueError("Bin edges must be unique: {}.\nYou "
219216
"can drop duplicate edges by setting "
220217
"the 'duplicates' kwarg".format(repr(bins)))
221-
else:
218+
elif duplicates == 'drop':
222219
bins = unique_bins
223220

224221
side = 'left' if right else 'right'

0 commit comments

Comments
 (0)