Skip to content

Commit 726efc7

Browse files
lllllllllljreback
authored andcommitted
BUG: don't allow users to move from an interned string (pandas-dev#14494)
1 parent 4814823 commit 726efc7

File tree

2 files changed

+49
-2
lines changed

2 files changed

+49
-2
lines changed

pandas/tests/test_util.py

+43
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
import nose
33

44
from collections import OrderedDict
5+
import sys
6+
import unittest
7+
from uuid import uuid4
58
from pandas.util._move import move_into_mutable_buffer, BadMove
69
from pandas.util.decorators import deprecate_kwarg
710
from pandas.util.validators import (validate_args, validate_kwargs,
@@ -325,6 +328,46 @@ def test_exactly_one_ref(self):
325328
# materialize as bytearray to show that it is mutable
326329
self.assertEqual(bytearray(as_stolen_buf), b'test')
327330

331+
@unittest.skipIf(
332+
sys.version_info[0] > 2,
333+
'bytes objects cannot be interned in py3',
334+
)
335+
def test_interned(self):
336+
salt = uuid4().hex
337+
338+
def make_string():
339+
# We need to actually create a new string so that it has refcount
340+
# one. We use a uuid so that we know the string could not already
341+
# be in the intern table.
342+
return ''.join(('testing: ', salt))
343+
344+
# This should work, the string has one reference on the stack.
345+
move_into_mutable_buffer(make_string())
346+
347+
refcount = [None] # nonlocal
348+
349+
def ref_capture(ob):
350+
# Subtract two because those are the references owned by this
351+
# frame:
352+
# 1. The local variables of this stack frame.
353+
# 2. The python data stack of this stack frame.
354+
refcount[0] = sys.getrefcount(ob) - 2
355+
return ob
356+
357+
with tm.assertRaises(BadMove):
358+
# If we intern the string it will still have one reference but now
359+
# it is in the intern table so if other people intern the same
360+
# string while the mutable buffer holds the first string they will
361+
# be the same instance.
362+
move_into_mutable_buffer(ref_capture(intern(make_string()))) # noqa
363+
364+
self.assertEqual(
365+
refcount[0],
366+
1,
367+
msg='The BadMove was probably raised for refcount reasons instead'
368+
' of interning reasons',
369+
)
370+
328371

329372
def test_numpy_errstate_is_default():
330373
# The defaults since numpy 1.6.0

pandas/util/move.c

+6-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
#define PyString_CheckExact PyBytes_CheckExact
88
#define PyString_AS_STRING PyBytes_AS_STRING
99
#define PyString_GET_SIZE PyBytes_GET_SIZE
10+
11+
/* in python 3, we cannot intern bytes objects so this is always false */
12+
#define PyString_CHECK_INTERNED(cs) 0
1013
#endif /* !COMPILING_IN_PY2 */
1114

1215
#ifndef Py_TPFLAGS_HAVE_GETCHARBUFFER
@@ -113,8 +116,9 @@ stolenbuf_new(PyObject *self, PyObject *args, PyObject *kwargs)
113116
return NULL;
114117
}
115118

116-
if (Py_REFCNT(bytes_rvalue) != 1) {
117-
/* there is a reference other than the caller's stack */
119+
if (Py_REFCNT(bytes_rvalue) != 1 || PyString_CHECK_INTERNED(bytes_rvalue)) {
120+
/* there is a reference other than the caller's stack or the string is
121+
interned */
118122
PyErr_SetObject(badmove, bytes_rvalue);
119123
return NULL;
120124
}

0 commit comments

Comments
 (0)