From d1e8b1b2654b878619638003aa1f90677675725f Mon Sep 17 00:00:00 2001 From: Joe Jevnik Date: Fri, 18 Nov 2016 20:55:50 -0500 Subject: [PATCH] BUG: Fix move_into_mutable_buffer for python 3.6. In python 3.6, the CALL_FUNCTION handling was updated. One change is that when calling a C function from a python function python now counts the reference owned by the argument tuple. This means that move was always seeing objects with two references instead of the expected one. Python 3.6 also removed a copy in the argument tuple when *unpacking functions. This means that if a user does: tuple = (create_string(),) move_into_mutable_buffer(*tuple) where create_string() creates a string object with one reference then we will fail to raise a BadMove even though the user could later retrieve that string with tuple[0]. There is no way to detect this case so this patch adds a warning to the docstring advising against star unpacking. --- pandas/tests/test_util.py | 10 ++- pandas/util/move.c | 170 ++++++++++++++++++-------------------- 2 files changed, 89 insertions(+), 91 deletions(-) diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index ee33e24c7f6c4..cb12048676d26 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -5,7 +5,7 @@ import sys import unittest from uuid import uuid4 -from pandas.util._move import move_into_mutable_buffer, BadMove +from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf from pandas.util.decorators import deprecate_kwarg from pandas.util.validators import (validate_args, validate_kwargs, validate_args_and_kwargs) @@ -299,6 +299,14 @@ def test_validation(self): class TestMove(tm.TestCase): + def test_cannot_create_instance_of_stolenbuffer(self): + """Stolen buffers need to be created through the smart constructor + ``move_into_mutable_buffer`` which has a bunch of checks in it. + """ + msg = "cannot create 'pandas.util._move.stolenbuf' instances" + with tm.assertRaisesRegexp(TypeError, msg): + stolenbuf() + def test_more_than_one_ref(self): """Test case for when we try to use ``move_into_mutable_buffer`` when the object being moved has other references. diff --git a/pandas/util/move.c b/pandas/util/move.c index fb918c302b100..9a8af5bbfbdf6 100644 --- a/pandas/util/move.c +++ b/pandas/util/move.c @@ -88,54 +88,37 @@ PyBufferProcs stolenbuf_as_buffer = { #endif /* COMPILING_IN_PY2 */ -static PyObject * -stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs) -{ - stolenbufobject *ret; - PyObject *bytes_rvalue; - - if (kwargs && PyDict_Size(kwargs)) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf does not accept keyword arguments"); - return NULL; - } - - if (PyTuple_GET_SIZE(args) != 1) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf requires exactly 1 positional argument"); - return NULL; - - } - - /* pull out the single, positional argument */ - bytes_rvalue = PyTuple_GET_ITEM(args, 0); - - if (!PyString_CheckExact(bytes_rvalue)) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf can only steal from bytes objects"); - return NULL; - } - - if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { - /* there is a reference other than the caller's stack or the string is - interned */ - PyErr_SetObject(badmove, bytes_rvalue); - return NULL; - } - - if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { - return NULL; - } +PyDoc_STRVAR(stolenbuf_doc, + "A buffer that is wrapping a stolen bytes object's buffer."); - /* store the original bytes object in a field that is not - exposed to python */ - Py_INCREF(bytes_rvalue); - ret->invalid_bytes = bytes_rvalue; - return (PyObject*) ret; -} +PyTypeObject stolenbuf_type = { + PyVarObject_HEAD_INIT(NULL, 0) + "pandas.util._move.stolenbuf", /* tp_name */ + sizeof(stolenbufobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + (destructor) stolenbuf_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_reserved */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + &stolenbuf_as_buffer, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | + Py_TPFLAGS_HAVE_NEWBUFFER | + Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ + stolenbuf_doc, /* tp_doc */ +}; PyDoc_STRVAR( - stolenbuf_doc, + move_into_mutable_buffer_doc, "Moves a bytes object that is about to be destroyed into a mutable buffer\n" "without copying the data.\n" "\n" @@ -159,49 +142,55 @@ PyDoc_STRVAR( "\n" "Notes\n" "-----\n" - "If you want to use this function you are probably wrong.\n"); + "If you want to use this function you are probably wrong.\n" + "\n" + "Warning: Do not call this function through *unpacking. This can\n" + "potentially trick the reference checks which may allow you to get a\n" + "mutable reference to a shared string!\n" + "\n"); + +/* This is implemented as a standalone function instead of the ``tp_new`` of + ``stolenbuf`` because we need to create a function using the METH_O flag + to support Python 3.6. In python 3.6, PyCFunction calls from python code now + count the reference owned by the argument tuple. This would cause the object + to have 2 references if used with a direct call like: ``stolenbuf(a)``; + however, if called through *unpacking like ``stolenbuf(*(a,))`` it would + only have the one reference (the tuple). */ +static PyObject* +move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) +{ + stolenbufobject *ret; -PyTypeObject stolenbuf_type = { - PyVarObject_HEAD_INIT(NULL, 0) - "pandas.util._move.stolenbuf", /* tp_name */ - sizeof(stolenbufobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor) stolenbuf_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - &stolenbuf_as_buffer, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | - Py_TPFLAGS_HAVE_NEWBUFFER | - Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ - stolenbuf_doc, /* tp_doc */ - 0, /* tp_traverse */ - 0, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - 0, /* tp_methods */ - 0, /* tp_members */ - 0, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - 0, /* tp_init */ - 0, /* tp_alloc */ - (newfunc) stolenbuf_new, /* tp_new */ + if (!PyString_CheckExact(bytes_rvalue)) { + PyErr_SetString(PyExc_TypeError, + "stolenbuf can only steal from bytes objects"); + return NULL; + } + + if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { + /* there is a reference other than the caller's stack or the string is + interned */ + PyErr_SetObject(badmove, bytes_rvalue); + return NULL; + } + + if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { + return NULL; + } + + /* store the original bytes object in a field that is not + exposed to python */ + Py_INCREF(bytes_rvalue); + ret->invalid_bytes = bytes_rvalue; + return (PyObject*) ret; +} + +PyMethodDef methods[] = { + {"move_into_mutable_buffer", + (PyCFunction) move_into_mutable_buffer, + METH_O, + move_into_mutable_buffer_doc}, + {NULL}, }; #define MODULE_NAME "pandas.util._move" @@ -212,6 +201,7 @@ PyModuleDef _move_module = { MODULE_NAME, NULL, -1, + methods, }; #endif /* !COMPILING_IN_PY2 */ @@ -223,7 +213,7 @@ PyDoc_STRVAR( "Parameters\n" "----------\n" "data : any\n" - " The data which was passed to ``_move_into_mutable_buffer``.\n" + " The data which was passed to ``move_into_mutable_buffer``.\n" "\n" "See Also\n" "--------\n" @@ -254,14 +244,14 @@ init_move(void) #if !COMPILING_IN_PY2 if (!(m = PyModule_Create(&_move_module))) #else - if (!(m = Py_InitModule(MODULE_NAME, NULL))) + if (!(m = Py_InitModule(MODULE_NAME, methods))) #endif /* !COMPILING_IN_PY2 */ { return ERROR_RETURN; } if (PyModule_AddObject(m, - "move_into_mutable_buffer", + "stolenbuf", (PyObject*) &stolenbuf_type)) { Py_DECREF(m); return ERROR_RETURN;