Skip to content

Commit 40ea1a5

Browse files
committed
Issue #15431
BUG: pd.qcut with q=1 and input with identical values
1 parent d842b70 commit 40ea1a5

File tree

3 files changed

+56
-0
lines changed

3 files changed

+56
-0
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ Bug Fixes
500500
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
501501
- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`)
502502
- Bug in ``pd.cut()`` single bin on all 0s array raises ``ValueError`` (:issue:`15428`)
503+
- Bug in ``pd.qcut()`` single quantile and array with identical values raises ``ValueError`` (:issue:`15431`)
503504
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`)
504505

505506

pandas/tests/tools/test_tile.py

+48
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,36 @@ def test_qcut_duplicates_bin(self):
285285
# invalid
286286
self.assertRaises(ValueError, qcut, values, 3, duplicates='foo')
287287

288+
def test_single_quantile(self):
289+
# issue 15431
290+
expected = Series([0, 0])
291+
292+
s = Series([9., 9.])
293+
result = qcut(s, 1, labels=False)
294+
tm.assert_series_equal(result, expected)
295+
296+
s = Series([-9., -9.])
297+
result = qcut(s, 1, labels=False)
298+
tm.assert_series_equal(result, expected)
299+
300+
s = Series([0., 0.])
301+
result = qcut(s, 1, labels=False)
302+
tm.assert_series_equal(result, expected)
303+
304+
expected = Series([0])
305+
306+
s = Series([9])
307+
result = qcut(s, 1, labels=False)
308+
tm.assert_series_equal(result, expected)
309+
310+
s = Series([-9])
311+
result = qcut(s, 1, labels=False)
312+
tm.assert_series_equal(result, expected)
313+
314+
s = Series([0])
315+
result = qcut(s, 1, labels=False)
316+
tm.assert_series_equal(result, expected)
317+
288318
def test_single_bin(self):
289319
# issue 14652
290320
expected = Series([0, 0])
@@ -297,11 +327,29 @@ def test_single_bin(self):
297327
result = cut(s, 1, labels=False)
298328
tm.assert_series_equal(result, expected)
299329

330+
expected = Series([0])
331+
332+
s = Series([9])
333+
result = cut(s, 1, labels=False)
334+
tm.assert_series_equal(result, expected)
335+
336+
s = Series([-9])
337+
result = cut(s, 1, labels=False)
338+
tm.assert_series_equal(result, expected)
339+
300340
# issue 15428
341+
expected = Series([0, 0])
342+
301343
s = Series([0., 0.])
302344
result = cut(s, 1, labels=False)
303345
tm.assert_series_equal(result, expected)
304346

347+
expected = Series([0])
348+
349+
s = Series([0])
350+
result = cut(s, 1, labels=False)
351+
tm.assert_series_equal(result, expected)
352+
305353
def test_datetime_cut(self):
306354
# GH 14714
307355
# testing for time data to be present as series

pandas/tools/tile.py

+7
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,13 @@ def qcut(x, q, labels=None, retbins=False, precision=3, duplicates='raise'):
189189
else:
190190
quantiles = q
191191
bins = algos.quantile(x, quantiles)
192+
193+
# fix special case: q=1 and all identical values
194+
if q == 1 and len(bins) == 2 and bins[0] == bins[1]:
195+
bins = np.asarray(bins, np.float64)
196+
bins[0] -= .001 * abs(bins[0]) if bins[0] != 0 else .001
197+
bins[1] += .001 * abs(bins[1]) if bins[1] != 0 else .001
198+
192199
fac, bins = _bins_to_cuts(x, bins, labels=labels,
193200
precision=precision, include_lowest=True,
194201
dtype=dtype, duplicates=duplicates)

0 commit comments

Comments
 (0)