Skip to content

Commit 8f94009

Browse files
committed
ENH: clearer out of bounds error message in cut/qcut, close #1409
1 parent b1edf54 commit 8f94009

File tree

5 files changed

+36
-0
lines changed

5 files changed

+36
-0
lines changed

RELEASE.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ pandas 0.8.0
123123
- Use index name as xlabel/ylabel in plots (#1415)
124124
- Add ``convert_dtype`` option to Series.apply to be able to leave data as
125125
dtype=object (#1414)
126+
- Can specify all index level names in concat (#1419)
126127

127128
**API Changes**
128129

pandas/core/algorithms.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,9 @@ def value_counts(values, sort=True, ascending=False):
156156
"""
157157
from pandas.core.series import Series
158158
from collections import defaultdict
159+
160+
values = np.asarray(values)
161+
159162
if com.is_integer_dtype(values.dtype):
160163
values = com._ensure_int64(values)
161164
keys, counts = lib.value_count_int64(values)

pandas/tests/test_factor.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import numpy as np
88

9+
from pandas.core.api import value_counts
910
from pandas.core.factor import Factor
1011
from pandas.core.index import Index, Int64Index, MultiIndex
1112
from pandas.util.testing import assert_almost_equal
@@ -82,6 +83,18 @@ def test_comparisons(self):
8283
expected = np.repeat(False, len(self.factor))
8384
self.assert_(np.array_equal(result, expected))
8485

86+
def test_value_counts(self):
87+
from pandas.tools.tile import cut
88+
89+
arr = np.random.randn(4)
90+
factor = cut(arr, 4)
91+
92+
self.assert_(isinstance(factor, Factor))
93+
94+
result = value_counts(factor)
95+
expected = value_counts(np.asarray(factor))
96+
tm.assert_series_equal(result, expected)
97+
8598
if __name__ == '__main__':
8699
import nose
87100
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],

pandas/tools/tests/test_tile.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,15 @@ def test_qcut_bounds(self):
109109
factor = qcut(arr, 10, labels=False)
110110
self.assert_(len(np.unique(factor)) == 10)
111111

112+
def test_cut_out_of_bounds(self):
113+
np.random.seed(12345)
114+
115+
arr = np.random.randn(100)
116+
self.assertRaises(ValueError, cut, arr, [-1, 0, 1])
117+
118+
arr = np.where(arr < -1, 0, arr)
119+
self.assertRaises(ValueError, cut, arr, [-1, 0, 1])
120+
112121
if __name__ == '__main__':
113122
nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
114123
exit=False)

pandas/tools/tile.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,16 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
146146
side = 'left' if right else 'right'
147147
ids = bins.searchsorted(x, side=side)
148148

149+
na_mask = com.notnull(x)
150+
above = na_mask & (ids == len(bins))
151+
below = na_mask & (ids == 0)
152+
153+
if above.any():
154+
raise ValueError('Values fall past last bin: %s' % str(x[above]))
155+
156+
if below.any():
157+
raise ValueError('Values fall before first bin: %s' % str(x[below]))
158+
149159
mask = com.isnull(x)
150160
has_nas = mask.any()
151161

0 commit comments

Comments
 (0)