Skip to content

Removed generate_bins_generic #29192

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Oct 29, 2019
Merged
53 changes: 0 additions & 53 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,59 +53,6 @@
)


def generate_bins_generic(values, binner, closed):
"""
Generate bin edge offsets and bin labels for one array using another array
which has bin edge values. Both arrays must be sorted.

Parameters
----------
values : array of values
binner : a comparable array of values representing bins into which to bin
the first array. Note, 'values' end-points must fall within 'binner'
end-points.
closed : which end of bin is closed; left (default), right

Returns
-------
bins : array of offsets (into 'values' argument) of bins.
Zero and last edge are excluded in result, so for instance the first
bin is values[0:bin[0]] and the last is values[bin[-1]:]
"""
lenidx = len(values)
lenbin = len(binner)

if lenidx <= 0 or lenbin <= 0:
raise ValueError("Invalid length for values or for binner")

# check binner fits data
if values[0] < binner[0]:
raise ValueError("Values falls before first bin")

if values[lenidx - 1] > binner[lenbin - 1]:
raise ValueError("Values falls after last bin")

bins = np.empty(lenbin - 1, dtype=np.int64)

j = 0 # index into values
bc = 0 # bin count

# linear scan, presume nothing about values/binner except that it fits ok
for i in range(0, lenbin - 1):
r_bin = binner[i + 1]

# count values in current bin, advance to next bin
while j < lenidx and (
values[j] < r_bin or (closed == "right" and values[j] == r_bin)
):
j += 1

bins[bc] = j
bc += 1

return bins


class BaseGrouper:
"""
This is an internal Grouper class, which actually holds
Expand Down
89 changes: 38 additions & 51 deletions pandas/tests/groupby/test_bin_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from pandas.core.dtypes.common import ensure_int64

from pandas import Index, Series, isna
from pandas.core.groupby.ops import generate_bins_generic
import pandas.util.testing as tm
from pandas.util.testing import assert_almost_equal


def test_series_grouper():
Expand All @@ -21,10 +19,10 @@ def test_series_grouper():
result, counts = grouper.get_result()

expected = np.array([obj[3:6].mean(), obj[6:].mean()])
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

exp_counts = np.array([3, 4], dtype=np.int64)
assert_almost_equal(counts, exp_counts)
tm.assert_almost_equal(counts, exp_counts)


def test_series_bin_grouper():
Expand All @@ -37,48 +35,37 @@ def test_series_bin_grouper():
result, counts = grouper.get_result()

expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

exp_counts = np.array([3, 3, 4], dtype=np.int64)
assert_almost_equal(counts, exp_counts)


class TestBinGroupers:
def setup_method(self, method):
self.obj = np.random.randn(10, 1)
self.labels = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 2], dtype=np.int64)
self.bins = np.array([3, 6], dtype=np.int64)

def test_generate_bins(self):
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
binner = np.array([0, 3, 6, 9], dtype=np.int64)

for func in [lib.generate_bins_dt64, generate_bins_generic]:
bins = func(values, binner, closed="left")
assert (bins == np.array([2, 5, 6])).all()

bins = func(values, binner, closed="right")
assert (bins == np.array([3, 6, 6])).all()

for func in [lib.generate_bins_dt64, generate_bins_generic]:
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
binner = np.array([0, 3, 6], dtype=np.int64)

bins = func(values, binner, closed="right")
assert (bins == np.array([3, 6])).all()

msg = "Invalid length for values or for binner"
with pytest.raises(ValueError, match=msg):
generate_bins_generic(values, [], "right")
with pytest.raises(ValueError, match=msg):
generate_bins_generic(values[:0], binner, "right")

msg = "Values falls before first bin"
with pytest.raises(ValueError, match=msg):
generate_bins_generic(values, [4], "right")
msg = "Values falls after last bin"
with pytest.raises(ValueError, match=msg):
generate_bins_generic(values, [-3, -1], "right")
tm.assert_almost_equal(counts, exp_counts)


@pytest.mark.parametrize(
"binner,closed,expected",
[
(
np.array([0, 3, 6, 9], dtype=np.int64),
"left",
np.array([2, 5, 6], dtype=np.int64),
),
(
np.array([0, 3, 6, 9], dtype=np.int64),
"right",
np.array([3, 6, 6], dtype=np.int64),
),
(np.array([0, 3, 6], dtype=np.int64), "left", np.array([2, 5], dtype=np.int64)),
(
np.array([0, 3, 6], dtype=np.int64),
"right",
np.array([3, 6], dtype=np.int64),
),
],
)
def test_generate_bins(binner, closed, expected):
values = np.array([1, 2, 3, 4, 5, 6], dtype=np.int64)
result = lib.generate_bins_dt64(values, binner, closed=closed)
tm.assert_numpy_array_equal(result, expected)


def test_group_ohlc():
Expand All @@ -100,13 +87,13 @@ def _ohlc(group):

expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])

assert_almost_equal(out, expected)
tm.assert_almost_equal(out, expected)
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))

obj[:6] = np.nan
func(out, counts, obj[:, None], labels)
expected[0] = np.nan
assert_almost_equal(out, expected)
tm.assert_almost_equal(out, expected)

_check("float32")
_check("float64")
Expand All @@ -121,29 +108,29 @@ def test_int_index(self):
arr = np.random.randn(100, 4)
result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
expected = arr.sum(0)
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

result = libreduction.compute_reduction(
arr, np.sum, axis=1, labels=Index(np.arange(100))
)
expected = arr.sum(1)
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

dummy = Series(0.0, index=np.arange(100))
result = libreduction.compute_reduction(
arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
)
expected = arr.sum(0)
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

dummy = Series(0.0, index=np.arange(4))
result = libreduction.compute_reduction(
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
)
expected = arr.sum(1)
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)

result = libreduction.compute_reduction(
arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
)
assert_almost_equal(result, expected)
tm.assert_almost_equal(result, expected)