Skip to content

BUG: Fix move_into_mutable_buffer for python 3.6. #14695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pandas/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import sys
import unittest
from uuid import uuid4
from pandas.util._move import move_into_mutable_buffer, BadMove
from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf
from pandas.util.decorators import deprecate_kwarg
from pandas.util.validators import (validate_args, validate_kwargs,
validate_args_and_kwargs)
Expand Down Expand Up @@ -299,6 +299,14 @@ def test_validation(self):


class TestMove(tm.TestCase):
def test_cannot_create_instance_of_stolenbuffer(self):
"""Stolen buffers need to be created through the smart constructor
``move_into_mutable_buffer`` which has a bunch of checks in it.
"""
msg = "cannot create 'pandas.util._move.stolenbuf' instances"
with tm.assertRaisesRegexp(TypeError, msg):
stolenbuf()

def test_more_than_one_ref(self):
"""Test case for when we try to use ``move_into_mutable_buffer`` when
the object being moved has other references.
Expand Down
170 changes: 80 additions & 90 deletions pandas/util/move.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,54 +88,37 @@ PyBufferProcs stolenbuf_as_buffer = {

#endif /* COMPILING_IN_PY2 */

static PyObject *
stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs)
{
stolenbufobject *ret;
PyObject *bytes_rvalue;

if (kwargs && PyDict_Size(kwargs)) {
PyErr_SetString(PyExc_TypeError,
"stolenbuf does not accept keyword arguments");
return NULL;
}

if (PyTuple_GET_SIZE(args) != 1) {
PyErr_SetString(PyExc_TypeError,
"stolenbuf requires exactly 1 positional argument");
return NULL;

}

/* pull out the single, positional argument */
bytes_rvalue = PyTuple_GET_ITEM(args, 0);

if (!PyString_CheckExact(bytes_rvalue)) {
PyErr_SetString(PyExc_TypeError,
"stolenbuf can only steal from bytes objects");
return NULL;
}

if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
/* there is a reference other than the caller's stack or the string is
interned */
PyErr_SetObject(badmove, bytes_rvalue);
return NULL;
}

if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) {
return NULL;
}
PyDoc_STRVAR(stolenbuf_doc,
"A buffer that is wrapping a stolen bytes object's buffer.");

/* store the original bytes object in a field that is not
exposed to python */
Py_INCREF(bytes_rvalue);
ret->invalid_bytes = bytes_rvalue;
return (PyObject*) ret;
}
PyTypeObject stolenbuf_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"pandas.util._move.stolenbuf", /* tp_name */
sizeof(stolenbufobject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) stolenbuf_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
&stolenbuf_as_buffer, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_HAVE_NEWBUFFER |
Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
stolenbuf_doc, /* tp_doc */
};

PyDoc_STRVAR(
stolenbuf_doc,
move_into_mutable_buffer_doc,
"Moves a bytes object that is about to be destroyed into a mutable buffer\n"
"without copying the data.\n"
"\n"
Expand All @@ -159,49 +142,55 @@ PyDoc_STRVAR(
"\n"
"Notes\n"
"-----\n"
"If you want to use this function you are probably wrong.\n");
"If you want to use this function you are probably wrong.\n"
"\n"
"Warning: Do not call this function through *unpacking. This can\n"
"potentially trick the reference checks which may allow you to get a\n"
"mutable reference to a shared string!\n"
"\n");

/* This is implemented as a standalone function instead of the ``tp_new`` of
``stolenbuf`` because we need to create a function using the METH_O flag
to support Python 3.6. In python 3.6, PyCFunction calls from python code now
count the reference owned by the argument tuple. This would cause the object
to have 2 references if used with a direct call like: ``stolenbuf(a)``;
however, if called through *unpacking like ``stolenbuf(*(a,))`` it would
only have the one reference (the tuple). */
static PyObject*
move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue)
{
stolenbufobject *ret;

PyTypeObject stolenbuf_type = {
PyVarObject_HEAD_INIT(NULL, 0)
"pandas.util._move.stolenbuf", /* tp_name */
sizeof(stolenbufobject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) stolenbuf_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
&stolenbuf_as_buffer, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT |
Py_TPFLAGS_HAVE_NEWBUFFER |
Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */
stolenbuf_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
(newfunc) stolenbuf_new, /* tp_new */
if (!PyString_CheckExact(bytes_rvalue)) {
PyErr_SetString(PyExc_TypeError,
"stolenbuf can only steal from bytes objects");
return NULL;
}

if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
/* there is a reference other than the caller's stack or the string is
interned */
PyErr_SetObject(badmove, bytes_rvalue);
return NULL;
}

if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) {
return NULL;
}

/* store the original bytes object in a field that is not
exposed to python */
Py_INCREF(bytes_rvalue);
ret->invalid_bytes = bytes_rvalue;
return (PyObject*) ret;
}

PyMethodDef methods[] = {
{"move_into_mutable_buffer",
(PyCFunction) move_into_mutable_buffer,
METH_O,
move_into_mutable_buffer_doc},
{NULL},
};

#define MODULE_NAME "pandas.util._move"
Expand All @@ -212,6 +201,7 @@ PyModuleDef _move_module = {
MODULE_NAME,
NULL,
-1,
methods,
};
#endif /* !COMPILING_IN_PY2 */

Expand All @@ -223,7 +213,7 @@ PyDoc_STRVAR(
"Parameters\n"
"----------\n"
"data : any\n"
" The data which was passed to ``_move_into_mutable_buffer``.\n"
" The data which was passed to ``move_into_mutable_buffer``.\n"
"\n"
"See Also\n"
"--------\n"
Expand Down Expand Up @@ -254,14 +244,14 @@ init_move(void)
#if !COMPILING_IN_PY2
if (!(m = PyModule_Create(&_move_module)))
#else
if (!(m = Py_InitModule(MODULE_NAME, NULL)))
if (!(m = Py_InitModule(MODULE_NAME, methods)))
#endif /* !COMPILING_IN_PY2 */
{
return ERROR_RETURN;
}

if (PyModule_AddObject(m,
"move_into_mutable_buffer",
"stolenbuf",
(PyObject*) &stolenbuf_type)) {
Py_DECREF(m);
return ERROR_RETURN;
Expand Down