From 4f1e2aaceba1a8d251c56185de5890c15d1fca19 Mon Sep 17 00:00:00 2001
From: Matthew Kirk <matt@matthewkirk.com>
Date: Sun, 17 Dec 2017 20:38:27 -0800
Subject: [PATCH 1/2] BUG make hashtable.unique support readonly arrays

This problem was brought up in
https://github.com/pandas-dev/pandas/issues/18773 and effectively comes
down to how Cython deals with readonly arrays. While it would be ideal
for Cython to fix the underlying problem in the meantime we can rely on
this.

fix: updates one_to_hundred for hundred_elements

This is because arange(100) isn't actually 1 to 100... it's 0 to 99

docs: adds comment to fix using ndarray and fixes indenting

test: parametrize test for test_readonly_cut

doc: add new whatsnew entry for v0.23.0

fix: checkout existing upstream v0.22.0
---
 doc/source/whatsnew/v0.23.0.txt            |   2 +-
 pandas/_libs/hashtable_class_helper.pxi.in | 100 ++++++++++++---------
 pandas/tests/reshape/test_tile.py          |  14 +++
 3 files changed, 74 insertions(+), 42 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 40e1e2011479c..5c947ba3d4766 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -341,7 +341,7 @@ Reshaping
 - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`)
 - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
 - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`)
--
+- Bug in :func:`cut` which fails when using readonly arrays (:issue:`18773`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in
index 6e1c4397810b7..bd9dd1f9bae37 100644
--- a/pandas/_libs/hashtable_class_helper.pxi.in
+++ b/pandas/_libs/hashtable_class_helper.pxi.in
@@ -255,10 +255,56 @@ dtypes = [('Float64', 'float64', 'val != val', True),
           ('UInt64', 'uint64', 'False', False),
           ('Int64', 'int64', 'val == iNaT', False)]
 
+def get_dispatch(dtypes):
+  for (name, dtype, null_condition, float_group) in dtypes:
+    unique_template = """\
+        cdef:
+           Py_ssize_t i, n = len(values)
+           int ret = 0
+           {dtype}_t val
+           khiter_t k
+           bint seen_na = 0
+           {name}Vector uniques = {name}Vector()
+           {name}VectorData *ud
+
+        ud = uniques.data
+
+        with nogil:
+            for i in range(n):
+                val = values[i]
+                IF {float_group}:
+                  if val == val:
+                      k = kh_get_{dtype}(self.table, val)
+                      if k == self.table.n_buckets:
+                          kh_put_{dtype}(self.table, val, &ret)
+                          if needs_resize(ud):
+                              with gil:
+                                  uniques.resize()
+                          append_data_{dtype}(ud, val)
+                  elif not seen_na:
+                      seen_na = 1
+                      if needs_resize(ud):
+                          with gil:
+                              uniques.resize()
+                      append_data_{dtype}(ud, NAN)
+                ELSE:
+                  k = kh_get_{dtype}(self.table, val)
+                  if k == self.table.n_buckets:
+                      kh_put_{dtype}(self.table, val, &ret)
+                      if needs_resize(ud):
+                          with gil:
+                              uniques.resize()
+                      append_data_{dtype}(ud, val)
+        return uniques.to_array()
+      """
+
+    unique_template = unique_template.format(name=name, dtype=dtype, null_condition=null_condition, float_group=float_group)
+
+    yield (name, dtype, null_condition, float_group, unique_template)
 }}
 
 
-{{for name, dtype, null_condition, float_group in dtypes}}
+{{for name, dtype, null_condition, float_group, unique_template in get_dispatch(dtypes)}}
 
 cdef class {{name}}HashTable(HashTable):
 
@@ -450,48 +496,20 @@ cdef class {{name}}HashTable(HashTable):
         return np.asarray(labels), arr_uniques
 
     @cython.boundscheck(False)
-    def unique(self, {{dtype}}_t[:] values):
-        cdef:
-            Py_ssize_t i, n = len(values)
-            int ret = 0
-            {{dtype}}_t val
-            khiter_t k
-            bint seen_na = 0
-            {{name}}Vector uniques = {{name}}Vector()
-            {{name}}VectorData *ud
+    def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
+        if values.flags.writeable:
+          # If the value is writeable (mutable) then use memview
+          return self.unique_memview(values)
 
-        ud = uniques.data
-
-        with nogil:
-            for i in range(n):
-                val = values[i]
-
-                {{if float_group}}
-                if val == val:
-                    k = kh_get_{{dtype}}(self.table, val)
-                    if k == self.table.n_buckets:
-                        kh_put_{{dtype}}(self.table, val, &ret)
-                        if needs_resize(ud):
-                            with gil:
-                                uniques.resize()
-                        append_data_{{dtype}}(ud, val)
-                elif not seen_na:
-                    seen_na = 1
-                    if needs_resize(ud):
-                        with gil:
-                            uniques.resize()
-                    append_data_{{dtype}}(ud, NAN)
-                {{else}}
-                k = kh_get_{{dtype}}(self.table, val)
-                if k == self.table.n_buckets:
-                    kh_put_{{dtype}}(self.table, val, &ret)
-                    if needs_resize(ud):
-                        with gil:
-                            uniques.resize()
-                    append_data_{{dtype}}(ud, val)
-                {{endif}}
+        # We cannot use the memoryview version on readonly-buffers due to
+        # a limitation of Cython's typed memoryviews. Instead we can use
+        # the slightly slower Cython ndarray type directly.
+        # see https://github.com/cython/cython/issues/1605
+{{unique_template}}
 
-        return uniques.to_array()
+    @cython.boundscheck(False)
+    def unique_memview(self, {{dtype}}_t[:] values):
+{{unique_template}}
 
 {{endfor}}
 
diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index c27af7a5bf8e4..a70dbb4ac9de5 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -512,6 +512,20 @@ def f():
         tm.assert_numpy_array_equal(
             mask, np.array([False, True, True, True, True]))
 
+    @pytest.mark.parametrize("array_1_writeable,array_2_writeable",[
+        (True, True), (True, False), (False, False)])
+    def test_cut_read_only(self, array_1_writeable, array_2_writeable):
+        # issue 18773
+        array_1 = np.arange(0, 100, 10)
+        array_1.flags.writeable = array_1_writeable
+
+        array_2 = np.arange(0, 100, 10)
+        array_2.flags.writeable = array_2_writeable
+
+        hundred_elements = np.arange(100)
+
+        tm.assert_categorical_equal(cut(hundred_elements, array_1),
+                                    cut(hundred_elements, array_2))
 
 def curpath():
     pth, _ = os.path.split(os.path.abspath(__file__))

From ddb5247d73345af80f5723d6f3865409f12b3877 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 23 Dec 2017 15:56:03 -0500
Subject: [PATCH 2/2] lint

---
 pandas/tests/reshape/test_tile.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py
index a70dbb4ac9de5..48f25112b45cf 100644
--- a/pandas/tests/reshape/test_tile.py
+++ b/pandas/tests/reshape/test_tile.py
@@ -512,8 +512,9 @@ def f():
         tm.assert_numpy_array_equal(
             mask, np.array([False, True, True, True, True]))
 
-    @pytest.mark.parametrize("array_1_writeable,array_2_writeable",[
-        (True, True), (True, False), (False, False)])
+    @pytest.mark.parametrize(
+        "array_1_writeable, array_2_writeable",
+        [(True, True), (True, False), (False, False)])
     def test_cut_read_only(self, array_1_writeable, array_2_writeable):
         # issue 18773
         array_1 = np.arange(0, 100, 10)
@@ -526,7 +527,3 @@ def test_cut_read_only(self, array_1_writeable, array_2_writeable):
 
         tm.assert_categorical_equal(cut(hundred_elements, array_1),
                                     cut(hundred_elements, array_2))
-
-def curpath():
-    pth, _ = os.path.split(os.path.abspath(__file__))
-    return pth