Skip to content

Commit d32acaa

Browse files
luca-sjreback
authored andcommitted
BUG: pd.cut with bins=1 and input all 0s
The special case of running pd.cut() qith bins=1 an input containing all 0s raises a ValueError closes pandas-dev#15428 closes pandas-dev#15431 Author: Luca Scarabello <[email protected]> Author: Luca <[email protected]> Closes pandas-dev#15437 from luca-s/issue_15428 and squashes the following commits: 1248987 [Luca] rebased on master def84ba [Luca] Yet another implementation attempt 692503a [Luca Scarabello] Improved solution: using same approach as pd.cut b7d92dc [Luca] Added 'allow' duplicates option to _bins_to_cuts f56a27f [Luca Scarabello] Issue pandas-dev#15431 55806cf [Luca Scarabello] BUG: pd.cut with bins=1 and input all 0s
1 parent 11c9479 commit d32acaa

File tree

3 files changed

+83
-8
lines changed

3 files changed

+83
-8
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,8 @@ Bug Fixes
698698
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`)
699699
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`)
700700
- Bug in ``Rolling.quantile`` function that caused a segmentation fault when called with a quantile value outside of the range [0, 1] (:issue:`15463`)
701-
702-
701+
- Bug in ``pd.cut()`` with a single bin on an all 0s array (:issue:`15428`)
702+
- Bug in ``pd.qcut()`` with a single quantile and an array with identical values (:issue:`15431`)
703703
- Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`)
704704

705705

pandas/tests/tools/test_tile.py

+78-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import numpy as np
44
from pandas.compat import zip
55

6-
from pandas import Series, Index
6+
from pandas import Series, Index, Categorical
77
import pandas.util.testing as tm
88
from pandas.util.testing import assertRaisesRegexp
99
import pandas.core.common as com
@@ -239,7 +239,6 @@ def test_qcut_binning_issues(self):
239239
self.assertTrue(ep <= sn)
240240

241241
def test_cut_return_categorical(self):
242-
from pandas import Categorical
243242
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
244243
res = cut(s, 3)
245244
exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -249,7 +248,6 @@ def test_cut_return_categorical(self):
249248
tm.assert_series_equal(res, exp)
250249

251250
def test_qcut_return_categorical(self):
252-
from pandas import Categorical
253251
s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
254252
res = qcut(s, [0, 0.333, 0.666, 1])
255253
exp = Series(Categorical.from_codes([0, 0, 0, 1, 1, 1, 2, 2, 2],
@@ -285,6 +283,60 @@ def test_qcut_duplicates_bin(self):
285283
# invalid
286284
self.assertRaises(ValueError, qcut, values, 3, duplicates='foo')
287285

286+
def test_single_quantile(self):
287+
# issue 15431
288+
expected = Series([0, 0])
289+
290+
s = Series([9., 9.])
291+
result = qcut(s, 1, labels=False)
292+
tm.assert_series_equal(result, expected)
293+
result = qcut(s, 1)
294+
exp_lab = Series(Categorical.from_codes([0, 0], ["[9, 9]"],
295+
ordered=True))
296+
tm.assert_series_equal(result, exp_lab)
297+
298+
s = Series([-9., -9.])
299+
result = qcut(s, 1, labels=False)
300+
tm.assert_series_equal(result, expected)
301+
result = qcut(s, 1)
302+
exp_lab = Series(Categorical.from_codes([0, 0], ["[-9, -9]"],
303+
ordered=True))
304+
tm.assert_series_equal(result, exp_lab)
305+
306+
s = Series([0., 0.])
307+
result = qcut(s, 1, labels=False)
308+
tm.assert_series_equal(result, expected)
309+
result = qcut(s, 1)
310+
exp_lab = Series(Categorical.from_codes([0, 0], ["[0, 0]"],
311+
ordered=True))
312+
tm.assert_series_equal(result, exp_lab)
313+
314+
expected = Series([0])
315+
316+
s = Series([9])
317+
result = qcut(s, 1, labels=False)
318+
tm.assert_series_equal(result, expected)
319+
result = qcut(s, 1)
320+
exp_lab = Series(Categorical.from_codes([0], ["[9, 9]"],
321+
ordered=True))
322+
tm.assert_series_equal(result, exp_lab)
323+
324+
s = Series([-9])
325+
result = qcut(s, 1, labels=False)
326+
tm.assert_series_equal(result, expected)
327+
result = qcut(s, 1)
328+
exp_lab = Series(Categorical.from_codes([0], ["[-9, -9]"],
329+
ordered=True))
330+
tm.assert_series_equal(result, exp_lab)
331+
332+
s = Series([0])
333+
result = qcut(s, 1, labels=False)
334+
tm.assert_series_equal(result, expected)
335+
result = qcut(s, 1)
336+
exp_lab = Series(Categorical.from_codes([0], ["[0, 0]"],
337+
ordered=True))
338+
tm.assert_series_equal(result, exp_lab)
339+
288340
def test_single_bin(self):
289341
# issue 14652
290342
expected = Series([0, 0])
@@ -297,6 +349,29 @@ def test_single_bin(self):
297349
result = cut(s, 1, labels=False)
298350
tm.assert_series_equal(result, expected)
299351

352+
expected = Series([0])
353+
354+
s = Series([9])
355+
result = cut(s, 1, labels=False)
356+
tm.assert_series_equal(result, expected)
357+
358+
s = Series([-9])
359+
result = cut(s, 1, labels=False)
360+
tm.assert_series_equal(result, expected)
361+
362+
# issue 15428
363+
expected = Series([0, 0])
364+
365+
s = Series([0., 0.])
366+
result = cut(s, 1, labels=False)
367+
tm.assert_series_equal(result, expected)
368+
369+
expected = Series([0])
370+
371+
s = Series([0])
372+
result = cut(s, 1, labels=False)
373+
tm.assert_series_equal(result, expected)
374+
300375
def test_datetime_cut(self):
301376
# GH 14714
302377
# testing for time data to be present as series

pandas/tools/tile.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
104104
mn, mx = [mi + 0.0 for mi in rng]
105105

106106
if mn == mx: # adjust end points before binning
107-
mn -= .001 * abs(mn)
108-
mx += .001 * abs(mx)
107+
mn -= .001 * abs(mn) if mn != 0 else .001
108+
mx += .001 * abs(mx) if mx != 0 else .001
109109
bins = np.linspace(mn, mx, bins + 1, endpoint=True)
110110
else: # adjust end points after binning
111111
bins = np.linspace(mn, mx, bins + 1, endpoint=True)
@@ -206,7 +206,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None,
206206
"valid options are: raise, drop")
207207

208208
unique_bins = algos.unique(bins)
209-
if len(unique_bins) < len(bins):
209+
if len(unique_bins) < len(bins) and len(bins) != 2:
210210
if duplicates == 'raise':
211211
raise ValueError("Bin edges must be unique: {}.\nYou "
212212
"can drop duplicate edges by setting "

0 commit comments

Comments
 (0)