Skip to content

Commit b80dcbc

Browse files
authored
BUG: groupby and agg on read-only array gives ValueError: buffer source array is read-only (#36061)
1 parent 497ede8 commit b80dcbc

File tree

3 files changed

+60
-15
lines changed

3 files changed

+60
-15
lines changed

doc/source/whatsnew/v1.1.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ Fixed regressions
1818
- Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`)
1919
- Performance regression for :meth:`RangeIndex.format` (:issue:`35712`)
2020
- Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`)
21-
-
21+
- Fixed regression in :meth:`DataFrameGroupBy.agg` where a ``ValueError: buffer source array is read-only`` would be raised when the underlying array is read-only (:issue:`36014`)
2222

2323

2424
.. ---------------------------------------------------------------------------

pandas/_libs/groupby.pyx

+18-14
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ def group_cumprod_float64(float64_t[:, :] out,
229229
@cython.boundscheck(False)
230230
@cython.wraparound(False)
231231
def group_cumsum(numeric[:, :] out,
232-
numeric[:, :] values,
232+
ndarray[numeric, ndim=2] values,
233233
const int64_t[:] labels,
234234
int ngroups,
235235
is_datetimelike,
@@ -472,7 +472,7 @@ ctypedef fused complexfloating_t:
472472
@cython.boundscheck(False)
473473
def _group_add(complexfloating_t[:, :] out,
474474
int64_t[:] counts,
475-
complexfloating_t[:, :] values,
475+
ndarray[complexfloating_t, ndim=2] values,
476476
const int64_t[:] labels,
477477
Py_ssize_t min_count=0):
478478
"""
@@ -483,8 +483,9 @@ def _group_add(complexfloating_t[:, :] out,
483483
complexfloating_t val, count
484484
complexfloating_t[:, :] sumx
485485
int64_t[:, :] nobs
486+
Py_ssize_t len_values = len(values), len_labels = len(labels)
486487

487-
if len(values) != len(labels):
488+
if len_values != len_labels:
488489
raise ValueError("len(index) != len(labels)")
489490

490491
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -530,7 +531,7 @@ group_add_complex128 = _group_add['double complex']
530531
@cython.boundscheck(False)
531532
def _group_prod(floating[:, :] out,
532533
int64_t[:] counts,
533-
floating[:, :] values,
534+
ndarray[floating, ndim=2] values,
534535
const int64_t[:] labels,
535536
Py_ssize_t min_count=0):
536537
"""
@@ -541,8 +542,9 @@ def _group_prod(floating[:, :] out,
541542
floating val, count
542543
floating[:, :] prodx
543544
int64_t[:, :] nobs
545+
Py_ssize_t len_values = len(values), len_labels = len(labels)
544546

545-
if not len(values) == len(labels):
547+
if len_values != len_labels:
546548
raise ValueError("len(index) != len(labels)")
547549

548550
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -582,7 +584,7 @@ group_prod_float64 = _group_prod['double']
582584
@cython.cdivision(True)
583585
def _group_var(floating[:, :] out,
584586
int64_t[:] counts,
585-
floating[:, :] values,
587+
ndarray[floating, ndim=2] values,
586588
const int64_t[:] labels,
587589
Py_ssize_t min_count=-1,
588590
int64_t ddof=1):
@@ -591,10 +593,11 @@ def _group_var(floating[:, :] out,
591593
floating val, ct, oldmean
592594
floating[:, :] mean
593595
int64_t[:, :] nobs
596+
Py_ssize_t len_values = len(values), len_labels = len(labels)
594597

595598
assert min_count == -1, "'min_count' only used in add and prod"
596599

597-
if not len(values) == len(labels):
600+
if len_values != len_labels:
598601
raise ValueError("len(index) != len(labels)")
599602

600603
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -639,18 +642,19 @@ group_var_float64 = _group_var['double']
639642
@cython.boundscheck(False)
640643
def _group_mean(floating[:, :] out,
641644
int64_t[:] counts,
642-
floating[:, :] values,
645+
ndarray[floating, ndim=2] values,
643646
const int64_t[:] labels,
644647
Py_ssize_t min_count=-1):
645648
cdef:
646649
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
647650
floating val, count
648651
floating[:, :] sumx
649652
int64_t[:, :] nobs
653+
Py_ssize_t len_values = len(values), len_labels = len(labels)
650654

651655
assert min_count == -1, "'min_count' only used in add and prod"
652656

653-
if not len(values) == len(labels):
657+
if len_values != len_labels:
654658
raise ValueError("len(index) != len(labels)")
655659

656660
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -689,7 +693,7 @@ group_mean_float64 = _group_mean['double']
689693
@cython.boundscheck(False)
690694
def _group_ohlc(floating[:, :] out,
691695
int64_t[:] counts,
692-
floating[:, :] values,
696+
ndarray[floating, ndim=2] values,
693697
const int64_t[:] labels,
694698
Py_ssize_t min_count=-1):
695699
"""
@@ -740,7 +744,7 @@ group_ohlc_float64 = _group_ohlc['double']
740744
@cython.boundscheck(False)
741745
@cython.wraparound(False)
742746
def group_quantile(ndarray[float64_t] out,
743-
numeric[:] values,
747+
ndarray[numeric, ndim=1] values,
744748
ndarray[int64_t] labels,
745749
ndarray[uint8_t] mask,
746750
float64_t q,
@@ -1072,7 +1076,7 @@ def group_nth(rank_t[:, :] out,
10721076
@cython.boundscheck(False)
10731077
@cython.wraparound(False)
10741078
def group_rank(float64_t[:, :] out,
1075-
rank_t[:, :] values,
1079+
ndarray[rank_t, ndim=2] values,
10761080
const int64_t[:] labels,
10771081
int ngroups,
10781082
bint is_datetimelike, object ties_method="average",
@@ -1424,7 +1428,7 @@ def group_min(groupby_t[:, :] out,
14241428
@cython.boundscheck(False)
14251429
@cython.wraparound(False)
14261430
def group_cummin(groupby_t[:, :] out,
1427-
groupby_t[:, :] values,
1431+
ndarray[groupby_t, ndim=2] values,
14281432
const int64_t[:] labels,
14291433
int ngroups,
14301434
bint is_datetimelike):
@@ -1484,7 +1488,7 @@ def group_cummin(groupby_t[:, :] out,
14841488
@cython.boundscheck(False)
14851489
@cython.wraparound(False)
14861490
def group_cummax(groupby_t[:, :] out,
1487-
groupby_t[:, :] values,
1491+
ndarray[groupby_t, ndim=2] values,
14881492
const int64_t[:] labels,
14891493
int ngroups,
14901494
bint is_datetimelike):

pandas/tests/groupby/aggregate/test_cython.py

+41
Original file line numberDiff line numberDiff line change
@@ -236,3 +236,44 @@ def test_cython_with_timestamp_and_nat(op, data):
236236

237237
result = df.groupby("a").aggregate(op)
238238
tm.assert_frame_equal(expected, result)
239+
240+
241+
@pytest.mark.parametrize(
242+
"agg",
243+
[
244+
"min",
245+
"max",
246+
"count",
247+
"sum",
248+
"prod",
249+
"var",
250+
"mean",
251+
"median",
252+
"ohlc",
253+
"cumprod",
254+
"cumsum",
255+
"shift",
256+
"any",
257+
"all",
258+
"quantile",
259+
"first",
260+
"last",
261+
"rank",
262+
"cummin",
263+
"cummax",
264+
],
265+
)
266+
def test_read_only_buffer_source_agg(agg):
267+
# https://github.com/pandas-dev/pandas/issues/36014
268+
df = DataFrame(
269+
{
270+
"sepal_length": [5.1, 4.9, 4.7, 4.6, 5.0],
271+
"species": ["setosa", "setosa", "setosa", "setosa", "setosa"],
272+
}
273+
)
274+
df._mgr.blocks[0].values.flags.writeable = False
275+
276+
result = df.groupby(["species"]).agg({"sepal_length": agg})
277+
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
278+
279+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)