diff --git a/pandas/tests/test_util.py b/pandas/tests/test_util.py index 9193880df7feb..f5828dab21e37 100644 --- a/pandas/tests/test_util.py +++ b/pandas/tests/test_util.py @@ -2,6 +2,9 @@ import nose from collections import OrderedDict +import sys +import unittest +from uuid import uuid4 from pandas.util._move import move_into_mutable_buffer, BadMove from pandas.util.decorators import deprecate_kwarg from pandas.util.validators import (validate_args, validate_kwargs, @@ -325,6 +328,46 @@ def test_exactly_one_ref(self): # materialize as bytearray to show that it is mutable self.assertEqual(bytearray(as_stolen_buf), b'test') + @unittest.skipIf( + sys.version_info[0] > 2, + 'bytes objects cannot be interned in py3', + ) + def test_interned(self): + salt = uuid4().hex + + def make_string(): + # We need to actually create a new string so that it has refcount + # one. We use a uuid so that we know the string could not already + # be in the intern table. + return ''.join(('testing: ', salt)) + + # This should work, the string has one reference on the stack. + move_into_mutable_buffer(make_string()) + + refcount = [None] # nonlocal + + def ref_capture(ob): + # Subtract two because those are the references owned by this + # frame: + # 1. The local variables of this stack frame. + # 2. The python data stack of this stack frame. + refcount[0] = sys.getrefcount(ob) - 2 + return ob + + with tm.assertRaises(BadMove): + # If we intern the string it will still have one reference but now + # it is in the intern table so if other people intern the same + # string while the mutable buffer holds the first string they will + # be the same instance. + move_into_mutable_buffer(ref_capture(intern(make_string()))) # noqa + + self.assertEqual( + refcount[0], + 1, + msg='The BadMove was probably raised for refcount reasons instead' + ' of interning reasons', + ) + def test_numpy_errstate_is_default(): # The defaults since numpy 1.6.0 diff --git a/pandas/util/move.c b/pandas/util/move.c index 68fcad793e16c..fb918c302b100 100644 --- a/pandas/util/move.c +++ b/pandas/util/move.c @@ -7,6 +7,9 @@ #define PyString_CheckExact PyBytes_CheckExact #define PyString_AS_STRING PyBytes_AS_STRING #define PyString_GET_SIZE PyBytes_GET_SIZE + +/* in python 3, we cannot intern bytes objects so this is always false */ +#define PyString_CHECK_INTERNED(cs) 0 #endif /* !COMPILING_IN_PY2 */ #ifndef Py_TPFLAGS_HAVE_GETCHARBUFFER @@ -113,8 +116,9 @@ stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs) return NULL; } - if (Py_REFCNT(bytes_rvalue) != 1) { - /* there is a reference other than the caller's stack */ + if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) { + /* there is a reference other than the caller's stack or the string is + interned */ PyErr_SetObject(badmove, bytes_rvalue); return NULL; }