diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index ee33e24c7f6c4..cb12048676d26 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -5,7 +5,7 @@ import sys import unittest from uuid import uuid4 -from pandas.util._move import move_into_mutable_buffer, BadMove +from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util.decorators import deprecate_kwarg from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) @@ -299,6 +299,14 @@ def test_validation(self): class TestMove(tm.TestCase): + def test_cannot_create_instance_of_stolenbuffer(self): + """Stolen buffers need to be created through the smart constructor + ``move_into_mutable_buffer`` which has a bunch of checks in it. + """ + msg = "cannot create 'pandas.util._move.stolenbuf' instances" + with tm.assertRaisesRegexp(TypeError, msg): + stolenbuf() + def test_more_than_one_ref(self): """Test case for when we try to use ``move_into_mutable_buffer`` when the object being moved has other references. diff --git a/pandas/util/move.c b/pandas/util/move.c index fb918c302b100..9a8af5bbfbdf6 100644 --- a/pandas/util/move.c +++ b/pandas/util/move.c @@ -88,54 +88,37 @@ PyBufferProcs stolenbuf_as_buffer = { #endif /* COMPILING_IN_PY2 */ -static PyObject * -stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs) -{ - stolenbufobject *ret; - PyObject *bytes_rvalue; - - if (kwargs && PyDict_Size(kwargs)) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf does not accept keyword arguments"); - return NULL; - } - - if (PyTuple_GET_SIZE(args) != 1) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf requires exactly 1 positional argument"); - return NULL; - - } - - /* pull out the single, positional argument */ - bytes_rvalue = PyTuple_GET_ITEM(args, 0); - - if (!PyString_CheckExact(bytes_rvalue)) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf can only steal from bytes objects"); - return NULL; - } - - if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { - /* there is a reference other than the caller's stack or the string is - interned */ - PyErr_SetObject(badmove, bytes_rvalue); - return NULL; - } - - if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { - return NULL; - } +PyDoc_STRVAR(stolenbuf_doc, + "A buffer that is wrapping a stolen bytes object's buffer."); - /* store the original bytes object in a field that is not - exposed to python */ - Py_INCREF(bytes_rvalue); - ret->invalid_bytes = bytes_rvalue; - return (PyObject*) ret; -} +PyTypeObject stolenbuf_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "pandas.util._move.stolenbuf", /* tp_name */ + sizeof(stolenbufobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) stolenbuf_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + &stolenbuf_as_buffer, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_HAVE_NEWBUFFER | + Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ + stolenbuf_doc, /* tp_doc */ +}; PyDoc_STRVAR( - stolenbuf_doc, + move_into_mutable_buffer_doc, "Moves a bytes object that is about to be destroyed into a mutable buffer\n" "without copying the data.\n" "\n" @@ -159,49 +142,55 @@ PyDoc_STRVAR( "\n" "Notes\n" "-----\n" - "If you want to use this function you are probably wrong.\n"); + "If you want to use this function you are probably wrong.\n" + "\n" + "Warning: Do not call this function through *unpacking. This can\n" + "potentially trick the reference checks which may allow you to get a\n" + "mutable reference to a shared string!\n" + "\n"); + +/* This is implemented as a standalone function instead of the ``tp_new`` of + ``stolenbuf`` because we need to create a function using the METH_O flag + to support Python 3.6. In python 3.6, PyCFunction calls from python code now + count the reference owned by the argument tuple. This would cause the object + to have 2 references if used with a direct call like: ``stolenbuf(a)``; + however, if called through *unpacking like ``stolenbuf(*(a,))`` it would + only have the one reference (the tuple). */ +static PyObject* +move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) +{ + stolenbufobject *ret; -PyTypeObject stolenbuf_type = { - PyVarObject_HEAD_INIT(NULL, 0) - "pandas.util._move.stolenbuf", /* tp_name */ - sizeof(stolenbufobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor) stolenbuf_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - &stolenbuf_as_buffer, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | - Py_TPFLAGS_HAVE_NEWBUFFER | - Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ - stolenbuf_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - (newfunc) stolenbuf_new, /* tp_new */ + if (!PyString_CheckExact(bytes_rvalue)) { + PyErr_SetString(PyExc_TypeError, + "stolenbuf can only steal from bytes objects"); + return NULL; + } + + if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { + /* there is a reference other than the caller's stack or the string is + interned */ + PyErr_SetObject(badmove, bytes_rvalue); + return NULL; + } + + if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { + return NULL; + } + + /* store the original bytes object in a field that is not + exposed to python */ + Py_INCREF(bytes_rvalue); + ret->invalid_bytes = bytes_rvalue; + return (PyObject*) ret; +} + +PyMethodDef methods[] = { + {"move_into_mutable_buffer", + (PyCFunction) move_into_mutable_buffer, + METH_O, + move_into_mutable_buffer_doc}, + {NULL}, }; #define MODULE_NAME "pandas.util._move" @@ -212,6 +201,7 @@ PyModuleDef _move_module = { MODULE_NAME, NULL, -1, + methods, }; #endif /* !COMPILING_IN_PY2 */ @@ -223,7 +213,7 @@ PyDoc_STRVAR( "Parameters\n" "----------\n" "data : any\n" - " The data which was passed to ``_move_into_mutable_buffer``.\n" + " The data which was passed to ``move_into_mutable_buffer``.\n" "\n" "See Also\n" "--------\n" @@ -254,14 +244,14 @@ init_move(void) #if !COMPILING_IN_PY2 if (!(m = PyModule_Create(&_move_module))) #else - if (!(m = Py_InitModule(MODULE_NAME, NULL))) + if (!(m = Py_InitModule(MODULE_NAME, methods))) #endif /* !COMPILING_IN_PY2 */ { return ERROR_RETURN; } if (PyModule_AddObject(m, - "move_into_mutable_buffer", + "stolenbuf", (PyObject*) &stolenbuf_type)) { Py_DECREF(m); return ERROR_RETURN;