Skip to content

Commit bf9ac58

Browse files
WillAydproost
authored andcommitted
Removed generate_bins_generic (pandas-dev#29192)
1 parent de6051e commit bf9ac58

File tree

2 files changed

+38
-104
lines changed

2 files changed

+38
-104
lines changed

pandas/core/groupby/ops.py

-53
Original file line numberDiff line numberDiff line change
@@ -53,59 +53,6 @@
5353
)
5454

5555

56-
def generate_bins_generic(values, binner, closed):
57-
"""
58-
Generate bin edge offsets and bin labels for one array using another array
59-
which has bin edge values. Both arrays must be sorted.
60-
61-
Parameters
62-
----------
63-
values : array of values
64-
binner : a comparable array of values representing bins into which to bin
65-
the first array. Note, 'values' end-points must fall within 'binner'
66-
end-points.
67-
closed : which end of bin is closed; left (default), right
68-
69-
Returns
70-
-------
71-
bins : array of offsets (into 'values' argument) of bins.
72-
Zero and last edge are excluded in result, so for instance the first
73-
bin is values[0:bin[0]] and the last is values[bin[-1]:]
74-
"""
75-
lenidx = len(values)
76-
lenbin = len(binner)
77-
78-
if lenidx <= 0 or lenbin <= 0:
79-
raise ValueError("Invalid length for values or for binner")
80-
81-
# check binner fits data
82-
if values[0] < binner[0]:
83-
raise ValueError("Values falls before first bin")
84-
85-
if values[lenidx - 1] > binner[lenbin - 1]:
86-
raise ValueError("Values falls after last bin")
87-
88-
bins = np.empty(lenbin - 1, dtype=np.int64)
89-
90-
j = 0 # index into values
91-
bc = 0 # bin count
92-
93-
# linear scan, presume nothing about values/binner except that it fits ok
94-
for i in range(0, lenbin - 1):
95-
r_bin = binner[i + 1]
96-
97-
# count values in current bin, advance to next bin
98-
while j < lenidx and (
99-
values[j] < r_bin or (closed == "right" and values[j] == r_bin)
100-
):
101-
j += 1
102-
103-
bins[bc] = j
104-
bc += 1
105-
106-
return bins
107-
108-
10956
class BaseGrouper:
11057
"""
11158
This is an internal Grouper class, which actually holds

pandas/tests/groupby/test_bin_groupby.py

+38-51
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
from pandas.core.dtypes.common import ensure_int64
77

88
from pandas import Index, Series, isna
9-
from pandas.core.groupby.ops import generate_bins_generic
109
import pandas.util.testing as tm
11-
from pandas.util.testing import assert_almost_equal
1210

1311

1412
def test_series_grouper():
@@ -21,10 +19,10 @@ def test_series_grouper():
2119
result, counts = grouper.get_result()
2220

2321
expected = np.array([obj[3:6].mean(), obj[6:].mean()])
24-
assert_almost_equal(result, expected)
22+
tm.assert_almost_equal(result, expected)
2523

2624
exp_counts = np.array([3, 4], dtype=np.int64)
27-
assert_almost_equal(counts, exp_counts)
25+
tm.assert_almost_equal(counts, exp_counts)
2826

2927

3028
def test_series_bin_grouper():
@@ -37,48 +35,37 @@ def test_series_bin_grouper():
3735
result, counts = grouper.get_result()
3836

3937
expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
40-
assert_almost_equal(result, expected)
38+
tm.assert_almost_equal(result, expected)
4139

4240
exp_counts = np.array([3, 3, 4], dtype=np.int64)
43-
assert_almost_equal(counts, exp_counts)
44-
45-
46-
class TestBinGroupers:
47-
def setup_method(self, method):
48-
self.obj = np.random.randn(10, 1)
49-
self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64)
50-
self.bins = np.array([3, 6], dtype=np.int64)
51-
52-
def test_generate_bins(self):
53-
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
54-
binner = np.array([0, 3, 6, 9], dtype=np.int64)
55-
56-
for func in [lib.generate_bins_dt64, generate_bins_generic]:
57-
bins = func(values, binner, closed="left")
58-
assert (bins == np.array([2, 5, 6])).all()
59-
60-
bins = func(values, binner, closed="right")
61-
assert (bins == np.array([3, 6, 6])).all()
62-
63-
for func in [lib.generate_bins_dt64, generate_bins_generic]:
64-
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
65-
binner = np.array([0, 3, 6], dtype=np.int64)
66-
67-
bins = func(values, binner, closed="right")
68-
assert (bins == np.array([3, 6])).all()
69-
70-
msg = "Invalid length for values or for binner"
71-
with pytest.raises(ValueError, match=msg):
72-
generate_bins_generic(values, [], "right")
73-
with pytest.raises(ValueError, match=msg):
74-
generate_bins_generic(values[:0], binner, "right")
75-
76-
msg = "Values falls before first bin"
77-
with pytest.raises(ValueError, match=msg):
78-
generate_bins_generic(values, [4], "right")
79-
msg = "Values falls after last bin"
80-
with pytest.raises(ValueError, match=msg):
81-
generate_bins_generic(values, [-3, -1], "right")
41+
tm.assert_almost_equal(counts, exp_counts)
42+
43+
44+
@pytest.mark.parametrize(
45+
"binner,closed,expected",
46+
[
47+
(
48+
np.array([0, 3, 6, 9], dtype=np.int64),
49+
"left",
50+
np.array([2, 5, 6], dtype=np.int64),
51+
),
52+
(
53+
np.array([0, 3, 6, 9], dtype=np.int64),
54+
"right",
55+
np.array([3, 6, 6], dtype=np.int64),
56+
),
57+
(np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)),
58+
(
59+
np.array([0, 3, 6], dtype=np.int64),
60+
"right",
61+
np.array([3, 6], dtype=np.int64),
62+
),
63+
],
64+
)
65+
def test_generate_bins(binner, closed, expected):
66+
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
67+
result = lib.generate_bins_dt64(values, binner, closed=closed)
68+
tm.assert_numpy_array_equal(result, expected)
8269

8370

8471
def test_group_ohlc():
@@ -100,13 +87,13 @@ def _ohlc(group):
10087

10188
expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
10289

103-
assert_almost_equal(out, expected)
90+
tm.assert_almost_equal(out, expected)
10491
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
10592

10693
obj[:6] = np.nan
10794
func(out, counts, obj[:, None], labels)
10895
expected[0] = np.nan
109-
assert_almost_equal(out, expected)
96+
tm.assert_almost_equal(out, expected)
11097

11198
_check("float32")
11299
_check("float64")
@@ -121,29 +108,29 @@ def test_int_index(self):
121108
arr = np.random.randn(100, 4)
122109
result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
123110
expected = arr.sum(0)
124-
assert_almost_equal(result, expected)
111+
tm.assert_almost_equal(result, expected)
125112

126113
result = libreduction.compute_reduction(
127114
arr, np.sum, axis=1, labels=Index(np.arange(100))
128115
)
129116
expected = arr.sum(1)
130-
assert_almost_equal(result, expected)
117+
tm.assert_almost_equal(result, expected)
131118

132119
dummy = Series(0.0, index=np.arange(100))
133120
result = libreduction.compute_reduction(
134121
arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
135122
)
136123
expected = arr.sum(0)
137-
assert_almost_equal(result, expected)
124+
tm.assert_almost_equal(result, expected)
138125

139126
dummy = Series(0.0, index=np.arange(4))
140127
result = libreduction.compute_reduction(
141128
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
142129
)
143130
expected = arr.sum(1)
144-
assert_almost_equal(result, expected)
131+
tm.assert_almost_equal(result, expected)
145132

146133
result = libreduction.compute_reduction(
147134
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
148135
)
149-
assert_almost_equal(result, expected)
136+
tm.assert_almost_equal(result, expected)

0 commit comments

Comments
 (0)