From e221dd84baa887c582ad0e45bdbf2777540d3222 Mon Sep 17 00:00:00 2001
From: mattip <matti.picus@gmail.com>
Date: Thu, 27 Apr 2017 08:13:15 +0300
Subject: [PATCH 1/3] COMPAT: add refcheck kwarg and percolate out to app-level
 for PyPy

---
 pandas/_libs/hashtable.pxd                 |  8 +--
 pandas/_libs/hashtable.pyx                 |  8 +--
 pandas/_libs/hashtable_class_helper.pxi.in | 75 ++++++++++++----------
 pandas/_libs/intervaltree.pxi.in           | 51 ++++++++-------
 pandas/core/algorithms.py                  |  4 +-
 5 files changed, 78 insertions(+), 68 deletions(-)

diff --git a/pandas/_libs/hashtable.pxd b/pandas/_libs/hashtable.pxd
index 9b352ae1c003b..65d8c5eff1b7b 100644
--- a/pandas/_libs/hashtable.pxd
+++ b/pandas/_libs/hashtable.pxd
@@ -53,7 +53,7 @@ cdef class Int64Vector:
     cdef Int64VectorData *data
     cdef ndarray ao
 
-    cdef resize(self)
-    cpdef to_array(self)
-    cdef inline void append(self, int64_t x)
-    cdef extend(self, int64_t[:] x)
+    cdef resize(self, refcheck=*)
+    cpdef to_array(self, refcheck=*)
+    cdef inline void append(self, int64_t x, refcheck=*)
+    cdef extend(self, int64_t[:] x, refcheck=*)
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index c8aedcef77502..4ad658c6b16c4 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -101,14 +101,14 @@ cdef class Int64Factorizer:
                   na_sentinel=-1, check_null=True):
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel,
-                                       check_null)
+                                       check_null, refcheck=False)
 
         # sort on
         if sort:
             if labels.dtype != np.intp:
                 labels = labels.astype(np.intp)
 
-            sorter = self.uniques.to_array().argsort()
+            sorter = self.uniques.to_array(refcheck=False).argsort()
             reverse_indexer = np.empty(len(sorter), dtype=np.intp)
             reverse_indexer.put(sorter, np.arange(len(sorter)))
 
@@ -142,12 +142,12 @@ def unique_label_indices(ndarray[int64_t, ndim=1] labels):
             if ret != 0:
                 if needs_resize(ud):
                     with gil:
-                        idx.resize()
+                        idx.resize(refcheck=False)
                 append_data_int64(ud, i)
 
     kh_destroy_int64(table)
 
-    arr = idx.to_array()
+    arr = idx.to_array(refcheck=False)
     arr = arr[labels[arr].argsort()]
 
     return arr[1:] if arr.size != 0 and labels[arr[0]] == -1 else arr
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 3ce82dace40a9..f7e18178d80db 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -85,10 +85,11 @@ cdef class {{name}}Vector:
         self.ao = np.empty(self.data.m, dtype={{idtype}})
         self.data.data = <{{arg}}*> self.ao.data
 
-    cdef resize(self):
-        self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
-        self.ao.resize(self.data.m)
+    cdef resize(self, refcheck=True):
+        m = max(self.data.m * 4, _INIT_VEC_CAP)
+        self.ao.resize(m, refcheck=refcheck) # could raise, change m later
         self.data.data = <{{arg}}*> self.ao.data
+        self.data.m = m
 
     def __dealloc__(self):
         if self.data is not NULL:
@@ -98,21 +99,21 @@ cdef class {{name}}Vector:
     def __len__(self):
         return self.data.n
 
-    cpdef to_array(self):
-        self.ao.resize(self.data.n)
+    cpdef to_array(self, refcheck=True):
+        self.ao.resize(self.data.n, refcheck=refcheck)
         self.data.m = self.data.n
+        self.data.data = <{{arg}}*> self.ao.data
         return self.ao
 
-    cdef inline void append(self, {{arg}} x):
+    cdef inline void append(self, {{arg}} x, refcheck=True):
 
         if needs_resize(self.data):
-            self.resize()
-
+            self.resize(refcheck=refcheck)
         append_data_{{dtype}}(self.data, x)
 
-    cdef extend(self, {{arg}}[:] x):
+    cdef extend(self, {{arg}}[:] x, refcheck=True):
         for i in range(len(x)):
-            self.append(x[i])
+            self.append(x[i], refcheck=refcheck)
 
 {{endfor}}
 
@@ -130,11 +131,12 @@ cdef class StringVector:
         self.data.m = _INIT_VEC_CAP
         self.data.data = <char **> malloc(self.data.m * sizeof(char *))
 
-    cdef resize(self):
+    cdef resize(self, refcheck=True):
         cdef:
             char **orig_data
             size_t i, m
 
+        # refcheck ignored, for compatibility only
         m = self.data.m
         self.data.m = max(self.data.m * 4, _INIT_VEC_CAP)
 
@@ -154,12 +156,13 @@ cdef class StringVector:
     def __len__(self):
         return self.data.n
 
-    def to_array(self):
+    def to_array(self, refcheck=True):
         cdef:
             ndarray ao
             size_t n
             object val
 
+        # refcheck ignored, for compatibility only
         ao = np.empty(self.data.n, dtype=np.object)
         for i in range(self.data.n):
             val = self.data.data[i]
@@ -167,10 +170,10 @@ cdef class StringVector:
         self.data.m = self.data.n
         return ao
 
-    cdef inline void append(self, char * x):
+    cdef inline void append(self, char * x, refcheck=True):
 
         if needs_resize(self.data):
-            self.resize()
+            self.resize(refcheck=refcheck)
 
         append_data_string(self.data, x)
 
@@ -191,18 +194,18 @@ cdef class ObjectVector:
     def __len__(self):
         return self.n
 
-    cdef inline append(self, object o):
+    cdef inline append(self, object o, refcheck=True):
         if self.n == self.m:
             self.m = max(self.m * 2, _INIT_VEC_CAP)
-            self.ao.resize(self.m)
+            self.ao.resize(self.m, refcheck=refcheck)
             self.data = <PyObject**> self.ao.data
 
         Py_INCREF(o)
         self.data[self.n] = <PyObject*> o
         self.n += 1
 
-    def to_array(self):
-        self.ao.resize(self.n)
+    def to_array(self, refcheck=True):
+        self.ao.resize(self.n, refcheck=refcheck)
         self.m = self.n
         return self.ao
 
@@ -324,13 +327,13 @@ cdef class {{name}}HashTable(HashTable):
 
     def factorize(self, {{dtype}}_t values):
         uniques = {{name}}Vector()
-        labels = self.get_labels(values, uniques, 0, 0)
-        return uniques.to_array(), labels
+        labels = self.get_labels(values, uniques, 0, 0, refcheck=False)
+        return uniques.to_array(refcheck=False), labels
 
     @cython.boundscheck(False)
     def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques,
                    Py_ssize_t count_prior, Py_ssize_t na_sentinel,
-                   bint check_null=True):
+                   bint check_null=True, bint refcheck=True):
         cdef:
             Py_ssize_t i, n = len(values)
             int64_t[:] labels
@@ -362,7 +365,7 @@ cdef class {{name}}HashTable(HashTable):
 
                     if needs_resize(ud):
                         with gil:
-                            uniques.resize()
+                            uniques.resize(refcheck=refcheck)
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
@@ -405,12 +408,12 @@ cdef class {{name}}HashTable(HashTable):
 
                     if needs_resize(ud):
                         with gil:
-                            uniques.resize()
+                            uniques.resize(refcheck=False)
                     append_data_{{dtype}}(ud, val)
                     labels[i] = count
                     count += 1
 
-        arr_uniques = uniques.to_array()
+        arr_uniques = uniques.to_array(refcheck=False)
 
         return np.asarray(labels), arr_uniques
 
@@ -438,13 +441,13 @@ cdef class {{name}}HashTable(HashTable):
                         kh_put_{{dtype}}(self.table, val, &ret)
                         if needs_resize(ud):
                             with gil:
-                                uniques.resize()
+                                uniques.resize(refcheck=False)
                         append_data_{{dtype}}(ud, val)
                 elif not seen_na:
                     seen_na = 1
                     if needs_resize(ud):
                         with gil:
-                            uniques.resize()
+                            uniques.resize(refcheck=False)
                     append_data_{{dtype}}(ud, NAN)
                 {{else}}
                 k = kh_get_{{dtype}}(self.table, val)
@@ -452,11 +455,11 @@ cdef class {{name}}HashTable(HashTable):
                     kh_put_{{dtype}}(self.table, val, &ret)
                     if needs_resize(ud):
                         with gil:
-                            uniques.resize()
+                            uniques.resize(refcheck=False)
                     append_data_{{dtype}}(ud, val)
                 {{endif}}
 
-        return uniques.to_array()
+        return uniques.to_array(refcheck=False)
 
 {{endfor}}
 
@@ -571,12 +574,12 @@ cdef class StringHashTable(HashTable):
         uniques = ObjectVector()
         for i in range(count):
             uniques.append(values[uindexer[i]])
-        return uniques.to_array()
+        return uniques.to_array(refcheck=False)
 
     def factorize(self, ndarray[object] values):
         uniques = ObjectVector()
-        labels = self.get_labels(values, uniques, 0, 0)
-        return uniques.to_array(), labels
+        labels = self.get_labels(values, uniques, 0, 0, refcheck=False)
+        return uniques.to_array(refcheck=False), labels
 
     @cython.boundscheck(False)
     def lookup(self, ndarray[object] values):
@@ -642,7 +645,7 @@ cdef class StringHashTable(HashTable):
     @cython.boundscheck(False)
     def get_labels(self, ndarray[object] values, ObjectVector uniques,
                    Py_ssize_t count_prior, int64_t na_sentinel,
-                   bint check_null=1):
+                   bint check_null=1, bint refcheck=1):
         cdef:
             Py_ssize_t i, n = len(values)
             int64_t[:] labels
@@ -654,6 +657,8 @@ cdef class StringHashTable(HashTable):
             char **vecs
             khiter_t k
 
+        # refcheck ignored, for compatibility only
+
         # these by-definition *must* be strings
         labels = np.zeros(n, dtype=np.int64)
         uindexer = np.empty(n, dtype=np.int64)
@@ -811,11 +816,11 @@ cdef class PyObjectHashTable(HashTable):
                 seen_na = 1
                 uniques.append(nan)
 
-        return uniques.to_array()
+        return uniques.to_array(refcheck=False)
 
     def get_labels(self, ndarray[object] values, ObjectVector uniques,
                    Py_ssize_t count_prior, int64_t na_sentinel,
-                   bint check_null=True):
+                   bint check_null=True, bint refcheck=True):
         cdef:
             Py_ssize_t i, n = len(values)
             int64_t[:] labels
@@ -824,6 +829,8 @@ cdef class PyObjectHashTable(HashTable):
             object val
             khiter_t k
 
+        # refcheck ignored, for compatibility only
+
         labels = np.empty(n, dtype=np.int64)
 
         for i in range(n):
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index 4fa0d6d156fa2..09b71d6ef6fdf 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -12,7 +12,7 @@ cimport cython
 cimport numpy as cnp
 cnp.import_array()
 
-from hashtable cimport Int64Vector, Int64VectorData
+from hashtable cimport Int64Vector
 
 
 ctypedef fused scalar_t:
@@ -91,10 +91,10 @@ cdef class IntervalTree(IntervalMixin):
         the given scalar key
         """
         result = Int64Vector()
-        self.root.query(result, key)
+        self.root.query(result, key, False)
         if not result.data.n:
             raise KeyError(key)
-        return result.to_array()
+        return result.to_array(refcheck=False)
 
     def _get_partial_overlap(self, key_left, key_right, side):
         """Return all positions corresponding to intervals with the given side
@@ -137,14 +137,14 @@ cdef class IntervalTree(IntervalMixin):
         result = Int64Vector()
         old_len = 0
         for i in range(len(target)):
-            self.root.query(result, target[i])
+            self.root.query(result, target[i], False)
             if result.data.n == old_len:
-                result.append(-1)
+                result.append(-1, refcheck=False)
             elif result.data.n > old_len + 1:
                 raise KeyError(
                     'indexer does not intersect a unique set of intervals')
             old_len = result.data.n
-        return result.to_array()
+        return result.to_array(refcheck=False)
 
     def get_indexer_non_unique(self, scalar_t[:] target):
         """Return the positions corresponding to intervals that overlap with
@@ -159,12 +159,12 @@ cdef class IntervalTree(IntervalMixin):
         missing = Int64Vector()
         old_len = 0
         for i in range(len(target)):
-            self.root.query(result, target[i])
+            self.root.query(result, target[i], False)
             if result.data.n == old_len:
-                result.append(-1)
-                missing.append(i)
+                result.append(-1, refcheck=False)
+                missing.append(i, refcheck=False)
             old_len = result.data.n
-        return result.to_array(), missing.to_array()
+        return result.to_array(refcheck=False), missing.to_array(refcheck=False)
 
     def __repr__(self):
         return ('<IntervalTree[{dtype},{closed}]: '
@@ -298,15 +298,15 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
 
         for i in range(self.n_elements):
             if right[i] {{cmp_right_converse}} self.pivot:
-                left_ind.append(i)
+                left_ind.append(i, refcheck=False)
             elif self.pivot {{cmp_left_converse}} left[i]:
-                right_ind.append(i)
+                right_ind.append(i, refcheck=False)
             else:
-                overlapping_ind.append(i)
+                overlapping_ind.append(i, refcheck=False)
 
-        return (left_ind.to_array(),
-                right_ind.to_array(),
-                overlapping_ind.to_array())
+        return (left_ind.to_array(refcheck=False),
+                right_ind.to_array(refcheck=False),
+                overlapping_ind.to_array(refcheck=False))
 
     cdef new_child_node(self,
                         ndarray[{{dtype}}_t, ndim=1] left,
@@ -321,12 +321,15 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
         return {{dtype_title}}Closed{{closed_title}}IntervalNode(
             left, right, indices, self.leaf_size)
 
+    # XXX older cython does not allow kwarg here, would be better as
+    # refcheck=True
     @cython.wraparound(False)
     @cython.boundscheck(False)
     @cython.initializedcheck(False)
-    cpdef query(self, Int64Vector result, scalar_t point):
+    cpdef query(self, Int64Vector result, scalar_t point, refcheck):
         """Recursively query this node and its sub-nodes for intervals that
-        overlap with the query point.
+        overlap with the query point. refcheck should be False, it is passed
+        to result.append, which uses NumPy resize(refcheck=refcheck)
         """
         cdef:
             int64_t[:] indices
@@ -339,7 +342,7 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
             # search.
             for i in range(self.n_elements):
                 if self.left[i] {{cmp_left}} point {{cmp_right}} self.right[i]:
-                    result.append(self.indices[i])
+                    result.append(self.indices[i], refcheck=refcheck)
         else:
             # There are child nodes. Based on comparing our query to the pivot,
             # look at the center values, then go to the relevant child.
@@ -349,20 +352,20 @@ cdef class {{dtype_title}}Closed{{closed_title}}IntervalNode:
                 for i in range(self.n_center):
                     if not values[i] {{cmp_left}} point:
                         break
-                    result.append(indices[i])
+                    result.append(indices[i], refcheck=refcheck)
                 if point {{cmp_right}} self.left_node.max_right:
-                    self.left_node.query(result, point)
+                    self.left_node.query(result, point, refcheck)
             elif point > self.pivot:
                 values = self.center_right_values
                 indices = self.center_right_indices
                 for i in range(self.n_center - 1, -1, -1):
                     if not point {{cmp_right}} values[i]:
                         break
-                    result.append(indices[i])
+                    result.append(indices[i], refcheck=refcheck)
                 if self.right_node.min_left {{cmp_left}} point:
-                    self.right_node.query(result, point)
+                    self.right_node.query(result, point, refcheck)
             else:
-                result.extend(self.center_left_indices)
+                result.extend(self.center_left_indices, refcheck=refcheck)
 
     def __repr__(self):
         if self.is_leaf_node:
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index 8437861bea19e..de7635512ec25 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -551,10 +551,10 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     table = hash_klass(size_hint or len(values))
     uniques = vec_klass()
     check_nulls = not is_integer_dtype(original)
-    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls)
+    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls, refcheck=False)
 
     labels = _ensure_platform_int(labels)
-    uniques = uniques.to_array()
+    uniques = uniques.to_array(refcheck=False)
 
     if sort and len(uniques) > 0:
         uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel,

From 4acc4cd356f3c4ebc29aab7b0dc8463782751ca4 Mon Sep 17 00:00:00 2001
From: mattip <matti.picus@gmail.com>
Date: Tue, 2 May 2017 07:32:30 +0300
Subject: [PATCH 2/3] COMPAT: add refcheck kwarg and percolate out to app-level
 for PyPy

---
 pandas/_libs/hashtable.pyx                 |  5 +++--
 pandas/_libs/hashtable_class_helper.pxi.in | 13 +++++--------
 pandas/core/algorithms.py                  |  3 ++-
 pandas/core/reshape/merge.py               |  2 +-
 pandas/tests/test_algos.py                 |  4 ++--
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 4ad658c6b16c4..c77b470aab11e 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -65,13 +65,14 @@ cdef class Factorizer:
         array([ 0,  1, 20])
         """
         labels = self.table.get_labels(values, self.uniques,
-                                       self.count, na_sentinel, check_null)
+                                       self.count, na_sentinel, 
+                                       check_null, refcheck=False)
         mask = (labels == na_sentinel)
         # sort on
         if sort:
             if labels.dtype != np.intp:
                 labels = labels.astype(np.intp)
-            sorter = self.uniques.to_array().argsort()
+            sorter = self.uniques.to_array(refcheck=False).argsort()
             reverse_indexer = np.empty(len(sorter), dtype=np.intp)
             reverse_indexer.put(sorter, np.arange(len(sorter)))
             labels = reverse_indexer.take(labels, mode='clip')
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index f7e18178d80db..5c8cb76571267 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -573,7 +573,7 @@ cdef class StringHashTable(HashTable):
         # uniques
         uniques = ObjectVector()
         for i in range(count):
-            uniques.append(values[uindexer[i]])
+            uniques.append(values[uindexer[i]], refcheck=False)
         return uniques.to_array(refcheck=False)
 
     def factorize(self, ndarray[object] values):
@@ -657,7 +657,6 @@ cdef class StringHashTable(HashTable):
             char **vecs
             khiter_t k
 
-        # refcheck ignored, for compatibility only
 
         # these by-definition *must* be strings
         labels = np.zeros(n, dtype=np.int64)
@@ -697,7 +696,7 @@ cdef class StringHashTable(HashTable):
 
         # uniques
         for i in range(count):
-            uniques.append(values[uindexer[i]])
+            uniques.append(values[uindexer[i]], refcheck=refcheck)
 
         return np.asarray(labels)
 
@@ -811,10 +810,10 @@ cdef class PyObjectHashTable(HashTable):
                 k = kh_get_pymap(self.table, <PyObject*>val)
                 if k == self.table.n_buckets:
                     kh_put_pymap(self.table, <PyObject*>val, &ret)
-                    uniques.append(val)
+                    uniques.append(val, refcheck=False)
             elif not seen_na:
                 seen_na = 1
-                uniques.append(nan)
+                uniques.append(nan, refcheck=False)
 
         return uniques.to_array(refcheck=False)
 
@@ -829,8 +828,6 @@ cdef class PyObjectHashTable(HashTable):
             object val
             khiter_t k
 
-        # refcheck ignored, for compatibility only
-
         labels = np.empty(n, dtype=np.int64)
 
         for i in range(n):
@@ -848,7 +845,7 @@ cdef class PyObjectHashTable(HashTable):
             else:
                 k = kh_put_pymap(self.table, <PyObject*>val, &ret)
                 self.table.vals[k] = count
-                uniques.append(val)
+                uniques.append(val, refcheck=refcheck)
                 labels[i] = count
                 count += 1
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index de7635512ec25..78747225c1dc4 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -551,7 +551,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
     table = hash_klass(size_hint or len(values))
     uniques = vec_klass()
     check_nulls = not is_integer_dtype(original)
-    labels = table.get_labels(values, uniques, 0, na_sentinel, check_nulls, refcheck=False)
+    labels = table.get_labels(values, uniques, 0, na_sentinel,
+                              check_nulls, refcheck=False)
 
     labels = _ensure_platform_int(labels)
     uniques = uniques.to_array(refcheck=False)
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 1ca3786ecc174..de020b22143da 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -1407,7 +1407,7 @@ def _factorize_keys(lk, rk, sort=True):
     count = rizer.get_count()
 
     if sort:
-        uniques = rizer.uniques.to_array()
+        uniques = rizer.uniques.to_array(refcheck=False)
         llab, rlab = _sort_labels(uniques, llab, rlab)
 
     # NA group
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index df267f2374051..54d913879f556 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1069,8 +1069,8 @@ def _test_vector_resize(htable, uniques, dtype, nvals):
             # get_labels appends to the vector
             htable.get_labels(vals[:nvals], uniques, 0, -1)
             # to_array resizes the vector
-            uniques.to_array()
-            htable.get_labels(vals, uniques, 0, -1)
+            uniques.to_array(refcheck=False)
+            htable.get_labels(vals, uniques, 0, -1, refcheck=False)
 
         test_cases = [
             (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'),

From 9aaf521233c47556620675273820f16be071fa95 Mon Sep 17 00:00:00 2001
From: mattip <matti.picus@gmail.com>
Date: Tue, 2 May 2017 18:52:27 +0300
Subject: [PATCH 3/3] TST: show how refcheck semantics leak to user space

---
 pandas/tests/test_algos.py | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 54d913879f556..5a12f3e0ed201 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -1064,25 +1064,33 @@ def test_vector_resize(self):
         # Test for memory errors after internal vector
         # reallocations (pull request #7157)
 
-        def _test_vector_resize(htable, uniques, dtype, nvals):
+        def _test_vector_resize(htable, uniques, dtype, nvals, tmp_changes):
             vals = np.array(np.random.randn(1000), dtype=dtype)
             # get_labels appends to the vector
             htable.get_labels(vals[:nvals], uniques, 0, -1)
             # to_array resizes the vector
-            uniques.to_array(refcheck=False)
+            tmp = uniques.to_array(refcheck=False)
+            oldshape = tmp.shape
             htable.get_labels(vals, uniques, 0, -1, refcheck=False)
+            with pytest.raises(ValueError):
+                # cannot resize, tmp is holding a reference
+                tmp2 = uniques.to_array() #refcheck=True is default value
+            # DANGEROUS - could change tmp
+            uniques.to_array(refcheck=False)
+            if tmp_changes:
+                assert oldshape != tmp.shape
 
         test_cases = [
-            (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'),
-            (hashtable.StringHashTable, hashtable.ObjectVector, 'object'),
-            (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64'),
-            (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64'),
-            (hashtable.UInt64HashTable, hashtable.UInt64Vector, 'uint64')]
+            (hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object', True),
+            (hashtable.StringHashTable, hashtable.ObjectVector, 'object', False),
+            (hashtable.Float64HashTable, hashtable.Float64Vector, 'float64', True),
+            (hashtable.Int64HashTable, hashtable.Int64Vector, 'int64', True),
+            (hashtable.UInt64HashTable, hashtable.UInt64Vector, 'uint64', True)]
 
-        for (tbl, vect, dtype) in test_cases:
+        for (tbl, vect, dtype, tmp_changes) in test_cases:
             # resizing to empty is a special case
-            _test_vector_resize(tbl(), vect(), dtype, 0)
-            _test_vector_resize(tbl(), vect(), dtype, 10)
+            _test_vector_resize(tbl(), vect(), dtype, 0, tmp_changes)
+            _test_vector_resize(tbl(), vect(), dtype, 10, tmp_changes)
 
 
 def test_quantile():