Skip to content

Backport PR #36061 on branch 1.1.x (BUG: groupby and agg on read-only array gives ValueError: buffer source array is read-only) #36117

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Fixed regressions
- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
-
- Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)


.. ---------------------------------------------------------------------------
Expand Down
32 changes: 18 additions & 14 deletions pandas/_libs/groupby.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def group_cumprod_float64(float64_t[:, :] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_cumsum(numeric[:, :] out,
numeric[:, :] values,
ndarray[numeric, ndim=2] values,
const int64_t[:] labels,
int ngroups,
is_datetimelike,
Expand Down Expand Up @@ -472,7 +472,7 @@ ctypedef fused complexfloating_t:
@cython.boundscheck(False)
def _group_add(complexfloating_t[:, :] out,
int64_t[:] counts,
complexfloating_t[:, :] values,
ndarray[complexfloating_t, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Expand All @@ -483,8 +483,9 @@ def _group_add(complexfloating_t[:, :] out,
complexfloating_t val, count
complexfloating_t[:, :] sumx
int64_t[:, :] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

if len(values) != len(labels):
if len_values != len_labels:
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
Expand Down Expand Up @@ -530,7 +531,7 @@ group_add_complex128 = _group_add['double complex']
@cython.boundscheck(False)
def _group_prod(floating[:, :] out,
int64_t[:] counts,
floating[:, :] values,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=0):
"""
Expand All @@ -541,8 +542,9 @@ def _group_prod(floating[:, :] out,
floating val, count
floating[:, :] prodx
int64_t[:, :] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

if not len(values) == len(labels):
if len_values != len_labels:
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
Expand Down Expand Up @@ -582,7 +584,7 @@ group_prod_float64 = _group_prod['double']
@cython.cdivision(True)
def _group_var(floating[:, :] out,
int64_t[:] counts,
floating[:, :] values,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1,
int64_t ddof=1):
Expand All @@ -591,10 +593,11 @@ def _group_var(floating[:, :] out,
floating val, ct, oldmean
floating[:, :] mean
int64_t[:, :] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"

if not len(values) == len(labels):
if len_values != len_labels:
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
Expand Down Expand Up @@ -639,18 +642,19 @@ group_var_float64 = _group_var['double']
@cython.boundscheck(False)
def _group_mean(floating[:, :] out,
int64_t[:] counts,
floating[:, :] values,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
cdef:
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
floating val, count
floating[:, :] sumx
int64_t[:, :] nobs
Py_ssize_t len_values = len(values), len_labels = len(labels)

assert min_count == -1, "'min_count' only used in add and prod"

if not len(values) == len(labels):
if len_values != len_labels:
raise ValueError("len(index) != len(labels)")

nobs = np.zeros((<object>out).shape, dtype=np.int64)
Expand Down Expand Up @@ -689,7 +693,7 @@ group_mean_float64 = _group_mean['double']
@cython.boundscheck(False)
def _group_ohlc(floating[:, :] out,
int64_t[:] counts,
floating[:, :] values,
ndarray[floating, ndim=2] values,
const int64_t[:] labels,
Py_ssize_t min_count=-1):
"""
Expand Down Expand Up @@ -740,7 +744,7 @@ group_ohlc_float64 = _group_ohlc['double']
@cython.boundscheck(False)
@cython.wraparound(False)
def group_quantile(ndarray[float64_t] out,
numeric[:] values,
ndarray[numeric, ndim=1] values,
ndarray[int64_t] labels,
ndarray[uint8_t] mask,
float64_t q,
Expand Down Expand Up @@ -1072,7 +1076,7 @@ def group_nth(rank_t[:, :] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_rank(float64_t[:, :] out,
rank_t[:, :] values,
ndarray[rank_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
bint is_datetimelike, object ties_method="average",
Expand Down Expand Up @@ -1424,7 +1428,7 @@ def group_min(groupby_t[:, :] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummin(groupby_t[:, :] out,
groupby_t[:, :] values,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
bint is_datetimelike):
Expand Down Expand Up @@ -1484,7 +1488,7 @@ def group_cummin(groupby_t[:, :] out,
@cython.boundscheck(False)
@cython.wraparound(False)
def group_cummax(groupby_t[:, :] out,
groupby_t[:, :] values,
ndarray[groupby_t, ndim=2] values,
const int64_t[:] labels,
int ngroups,
bint is_datetimelike):
Expand Down
41 changes: 41 additions & 0 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,3 +236,44 @@ def test_cython_with_timestamp_and_nat(op, data):

result = df.groupby("a").aggregate(op)
tm.assert_frame_equal(expected, result)


@pytest.mark.parametrize(
"agg",
[
"min",
"max",
"count",
"sum",
"prod",
"var",
"mean",
"median",
"ohlc",
"cumprod",
"cumsum",
"shift",
"any",
"all",
"quantile",
"first",
"last",
"rank",
"cummin",
"cummax",
],
)
def test_read_only_buffer_source_agg(agg):
# https://github.com/pandas-dev/pandas/issues/36014
df = DataFrame(
{
"sepal_length": [5.1, 4.9, 4.7, 4.6, 5.0],
"species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
}
)
df._mgr.blocks[0].values.flags.writeable = False

result = df.groupby(["species"]).agg({"sepal_length": agg})
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})

tm.assert_equal(result, expected)