Skip to content

Commit f609640

Browse files
Joe Jevnikjreback
Joe Jevnik
authored andcommitted
BUG: Fix move_into_mutable_buffer for python 3.6.
In python 3.6, the CALL_FUNCTION handling was updated. One change is that when calling a C function from a python function python now counts the reference owned by the argument tuple. This means that move was always seeing objects with two references instead of the expected one. Python 3.6 also removed a copy in the argument tuple when *unpacking functions. This means that if a user does: tuple = (create_string(),) move_into_mutable_buffer(*tuple) where create_string() creates a string object with one reference then we will fail to raise a BadMove even though the user could later retrieve that string with tuple[0]. There is no way to detect this case so this patch adds a warning to the docstring advising against star unpacking. xref: #14679 I played around with removing the extra reference that was added in 3.6 but it looks like playing with borrowed refs everywhere will be a bit tricky. This change should clear things up for 3.6 while continuing to work for older versions. In 3.6 you __could__ get a shared mutable string from this but you need to try pretty hard for it. Author: Joe Jevnik <[email protected]> Closes #14695 from llllllllll/move-3.6-compat and squashes the following commits: d1e8b1b [Joe Jevnik] BUG: Fix move_into_mutable_buffer for python 3.6.
1 parent f26b049 commit f609640

File tree

2 files changed

+89
-91
lines changed

2 files changed

+89
-91
lines changed

pandas/tests/test_util.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import sys
66
import unittest
77
from uuid import uuid4
8-
from pandas.util._move import move_into_mutable_buffer, BadMove
8+
from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf
99
from pandas.util.decorators import deprecate_kwarg
1010
from pandas.util.validators import (validate_args, validate_kwargs,
1111
validate_args_and_kwargs)
@@ -299,6 +299,14 @@ def test_validation(self):
299299

300300

301301
class TestMove(tm.TestCase):
302+
def test_cannot_create_instance_of_stolenbuffer(self):
303+
"""Stolen buffers need to be created through the smart constructor
304+
``move_into_mutable_buffer`` which has a bunch of checks in it.
305+
"""
306+
msg = "cannot create 'pandas.util._move.stolenbuf' instances"
307+
with tm.assertRaisesRegexp(TypeError, msg):
308+
stolenbuf()
309+
302310
def test_more_than_one_ref(self):
303311
"""Test case for when we try to use ``move_into_mutable_buffer`` when
304312
the object being moved has other references.

pandas/util/move.c

+80-90
Original file line numberDiff line numberDiff line change
@@ -88,54 +88,37 @@ PyBufferProcs stolenbuf_as_buffer = {
8888

8989
#endif /* COMPILING_IN_PY2 */
9090

91-
static PyObject *
92-
stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs)
93-
{
94-
stolenbufobject *ret;
95-
PyObject *bytes_rvalue;
96-
97-
if (kwargs && PyDict_Size(kwargs)) {
98-
PyErr_SetString(PyExc_TypeError,
99-
"stolenbuf does not accept keyword arguments");
100-
return NULL;
101-
}
102-
103-
if (PyTuple_GET_SIZE(args) != 1) {
104-
PyErr_SetString(PyExc_TypeError,
105-
"stolenbuf requires exactly 1 positional argument");
106-
return NULL;
107-
108-
}
109-
110-
/* pull out the single, positional argument */
111-
bytes_rvalue = PyTuple_GET_ITEM(args, 0);
112-
113-
if (!PyString_CheckExact(bytes_rvalue)) {
114-
PyErr_SetString(PyExc_TypeError,
115-
"stolenbuf can only steal from bytes objects");
116-
return NULL;
117-
}
118-
119-
if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
120-
/* there is a reference other than the caller's stack or the string is
121-
interned */
122-
PyErr_SetObject(badmove, bytes_rvalue);
123-
return NULL;
124-
}
125-
126-
if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) {
127-
return NULL;
128-
}
91+
PyDoc_STRVAR(stolenbuf_doc,
92+
"A buffer that is wrapping a stolen bytes object's buffer.");
12993

130-
/* store the original bytes object in a field that is not
131-
exposed to python */
132-
Py_INCREF(bytes_rvalue);
133-
ret->invalid_bytes = bytes_rvalue;
134-
return (PyObject*) ret;
135-
}
94+
PyTypeObject stolenbuf_type = {
95+
PyVarObject_HEAD_INIT(NULL, 0)
96+
"pandas.util._move.stolenbuf", /* tp_name */
97+
sizeof(stolenbufobject), /* tp_basicsize */
98+
0, /* tp_itemsize */
99+
(destructor) stolenbuf_dealloc, /* tp_dealloc */
100+
0, /* tp_print */
101+
0, /* tp_getattr */
102+
0, /* tp_setattr */
103+
0, /* tp_reserved */
104+
0, /* tp_repr */
105+
0, /* tp_as_number */
106+
0, /* tp_as_sequence */
107+
0, /* tp_as_mapping */
108+
0, /* tp_hash */
109+
0, /* tp_call */
110+
0, /* tp_str */
111+
0, /* tp_getattro */
112+
0, /* tp_setattro */
113+
&stolenbuf_as_buffer, /* tp_as_buffer */
114+
Py_TPFLAGS_DEFAULT |
115+
Py_TPFLAGS_HAVE_NEWBUFFER |
116+
Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
117+
stolenbuf_doc, /* tp_doc */
118+
};
136119

137120
PyDoc_STRVAR(
138-
stolenbuf_doc,
121+
move_into_mutable_buffer_doc,
139122
"Moves a bytes object that is about to be destroyed into a mutable buffer\n"
140123
"without copying the data.\n"
141124
"\n"
@@ -159,49 +142,55 @@ PyDoc_STRVAR(
159142
"\n"
160143
"Notes\n"
161144
"-----\n"
162-
"If you want to use this function you are probably wrong.\n");
145+
"If you want to use this function you are probably wrong.\n"
146+
"\n"
147+
"Warning: Do not call this function through *unpacking. This can\n"
148+
"potentially trick the reference checks which may allow you to get a\n"
149+
"mutable reference to a shared string!\n"
150+
"\n");
151+
152+
/* This is implemented as a standalone function instead of the ``tp_new`` of
153+
``stolenbuf`` because we need to create a function using the METH_O flag
154+
to support Python 3.6. In python 3.6, PyCFunction calls from python code now
155+
count the reference owned by the argument tuple. This would cause the object
156+
to have 2 references if used with a direct call like: ``stolenbuf(a)``;
157+
however, if called through *unpacking like ``stolenbuf(*(a,))`` it would
158+
only have the one reference (the tuple). */
159+
static PyObject*
160+
move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue)
161+
{
162+
stolenbufobject *ret;
163163

164-
PyTypeObject stolenbuf_type = {
165-
PyVarObject_HEAD_INIT(NULL, 0)
166-
"pandas.util._move.stolenbuf", /* tp_name */
167-
sizeof(stolenbufobject), /* tp_basicsize */
168-
0, /* tp_itemsize */
169-
(destructor) stolenbuf_dealloc, /* tp_dealloc */
170-
0, /* tp_print */
171-
0, /* tp_getattr */
172-
0, /* tp_setattr */
173-
0, /* tp_reserved */
174-
0, /* tp_repr */
175-
0, /* tp_as_number */
176-
0, /* tp_as_sequence */
177-
0, /* tp_as_mapping */
178-
0, /* tp_hash */
179-
0, /* tp_call */
180-
0, /* tp_str */
181-
0, /* tp_getattro */
182-
0, /* tp_setattro */
183-
&stolenbuf_as_buffer, /* tp_as_buffer */
184-
Py_TPFLAGS_DEFAULT |
185-
Py_TPFLAGS_HAVE_NEWBUFFER |
186-
Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
187-
stolenbuf_doc, /* tp_doc */
188-
0, /* tp_traverse */
189-
0, /* tp_clear */
190-
0, /* tp_richcompare */
191-
0, /* tp_weaklistoffset */
192-
0, /* tp_iter */
193-
0, /* tp_iternext */
194-
0, /* tp_methods */
195-
0, /* tp_members */
196-
0, /* tp_getset */
197-
0, /* tp_base */
198-
0, /* tp_dict */
199-
0, /* tp_descr_get */
200-
0, /* tp_descr_set */
201-
0, /* tp_dictoffset */
202-
0, /* tp_init */
203-
0, /* tp_alloc */
204-
(newfunc) stolenbuf_new, /* tp_new */
164+
if (!PyString_CheckExact(bytes_rvalue)) {
165+
PyErr_SetString(PyExc_TypeError,
166+
"stolenbuf can only steal from bytes objects");
167+
return NULL;
168+
}
169+
170+
if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
171+
/* there is a reference other than the caller's stack or the string is
172+
interned */
173+
PyErr_SetObject(badmove, bytes_rvalue);
174+
return NULL;
175+
}
176+
177+
if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) {
178+
return NULL;
179+
}
180+
181+
/* store the original bytes object in a field that is not
182+
exposed to python */
183+
Py_INCREF(bytes_rvalue);
184+
ret->invalid_bytes = bytes_rvalue;
185+
return (PyObject*) ret;
186+
}
187+
188+
PyMethodDef methods[] = {
189+
{"move_into_mutable_buffer",
190+
(PyCFunction) move_into_mutable_buffer,
191+
METH_O,
192+
move_into_mutable_buffer_doc},
193+
{NULL},
205194
};
206195

207196
#define MODULE_NAME "pandas.util._move"
@@ -212,6 +201,7 @@ PyModuleDef _move_module = {
212201
MODULE_NAME,
213202
NULL,
214203
-1,
204+
methods,
215205
};
216206
#endif /* !COMPILING_IN_PY2 */
217207

@@ -223,7 +213,7 @@ PyDoc_STRVAR(
223213
"Parameters\n"
224214
"----------\n"
225215
"data : any\n"
226-
" The data which was passed to ``_move_into_mutable_buffer``.\n"
216+
" The data which was passed to ``move_into_mutable_buffer``.\n"
227217
"\n"
228218
"See Also\n"
229219
"--------\n"
@@ -254,14 +244,14 @@ init_move(void)
254244
#if !COMPILING_IN_PY2
255245
if (!(m = PyModule_Create(&_move_module)))
256246
#else
257-
if (!(m = Py_InitModule(MODULE_NAME, NULL)))
247+
if (!(m = Py_InitModule(MODULE_NAME, methods)))
258248
#endif /* !COMPILING_IN_PY2 */
259249
{
260250
return ERROR_RETURN;
261251
}
262252

263253
if (PyModule_AddObject(m,
264-
"move_into_mutable_buffer",
254+
"stolenbuf",
265255
(PyObject*) &stolenbuf_type)) {
266256
Py_DECREF(m);
267257
return ERROR_RETURN;

0 commit comments

Comments
 (0)