diff --git a/ci/appveyor-27.yaml b/ci/appveyor-27.yaml index 10511ac0e00ca..114dcfb0c6440 100644 --- a/ci/appveyor-27.yaml +++ b/ci/appveyor-27.yaml @@ -24,7 +24,7 @@ dependencies: - xlsxwriter - xlwt # universal - - cython + - cython>=0.28.2 - pytest - pytest-xdist - moto diff --git a/ci/appveyor-36.yaml b/ci/appveyor-36.yaml index 868724419c464..63e45d0544ad9 100644 --- a/ci/appveyor-36.yaml +++ b/ci/appveyor-36.yaml @@ -22,6 +22,6 @@ dependencies: - xlsxwriter - xlwt # universal - - cython + - cython>=0.28.2 - pytest - pytest-xdist diff --git a/ci/circle-27-compat.yaml b/ci/circle-27-compat.yaml index 81a48d4edf11c..b5be569eb28a4 100644 --- a/ci/circle-27-compat.yaml +++ b/ci/circle-27-compat.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - bottleneck=1.0.0 - - cython=0.24 + - cython=0.28.2 - jinja2=2.8 - numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr - numpy=1.9.2 diff --git a/ci/circle-35-ascii.yaml b/ci/circle-35-ascii.yaml index 602c414b49bb2..745678791458d 100644 --- a/ci/circle-35-ascii.yaml +++ b/ci/circle-35-ascii.yaml @@ -2,7 +2,7 @@ name: pandas channels: - defaults dependencies: - - cython + - cython>=0.28.2 - nomkl - numpy - python-dateutil diff --git a/ci/circle-36-locale.yaml b/ci/circle-36-locale.yaml index cc852c1e2aeeb..091a5a637becd 100644 --- a/ci/circle-36-locale.yaml +++ b/ci/circle-36-locale.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - beautifulsoup4 - - cython + - cython>=0.28.2 - html5lib - ipython - jinja2 diff --git a/ci/circle-36-locale_slow.yaml b/ci/circle-36-locale_slow.yaml index f44e98e1ee09d..649f93f7aa427 100644 --- a/ci/circle-36-locale_slow.yaml +++ b/ci/circle-36-locale_slow.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - beautifulsoup4 - - cython + - cython>=0.28.2 - gcsfs - html5lib - ipython diff --git a/ci/environment-dev.yaml b/ci/environment-dev.yaml index 5733857b55dd4..797506547b773 100644 --- a/ci/environment-dev.yaml +++ b/ci/environment-dev.yaml @@ -3,7 +3,7 @@ channels: - defaults - conda-forge dependencies: - - Cython + - Cython>=0.28.2 - NumPy - flake8 - moto diff --git a/ci/travis-27-locale.yaml b/ci/travis-27-locale.yaml index 1312c1296d46a..78cbe8f59a8e0 100644 --- a/ci/travis-27-locale.yaml +++ b/ci/travis-27-locale.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - bottleneck=1.0.0 - - cython=0.24 + - cython=0.28.2 - lxml - matplotlib=1.4.3 - numpy=1.9.2 diff --git a/ci/travis-27.yaml b/ci/travis-27.yaml index 482b888b88062..9cb20734dc63d 100644 --- a/ci/travis-27.yaml +++ b/ci/travis-27.yaml @@ -5,7 +5,7 @@ channels: dependencies: - beautifulsoup4 - bottleneck - - cython=0.24 + - cython=0.28.2 - fastparquet - feather-format - flake8=3.4.1 diff --git a/ci/travis-35-osx.yaml b/ci/travis-35-osx.yaml index e74abac4c9775..fff7acc64d537 100644 --- a/ci/travis-35-osx.yaml +++ b/ci/travis-35-osx.yaml @@ -4,7 +4,7 @@ channels: dependencies: - beautifulsoup4 - bottleneck - - cython + - cython>=0.28.2 - html5lib - jinja2 - lxml diff --git a/ci/travis-36-doc.yaml b/ci/travis-36-doc.yaml index c22dddbe0ba3f..153a81197a6c7 100644 --- a/ci/travis-36-doc.yaml +++ b/ci/travis-36-doc.yaml @@ -6,7 +6,7 @@ channels: dependencies: - beautifulsoup4 - bottleneck - - cython + - cython>=0.28.2 - fastparquet - feather-format - html5lib diff --git a/ci/travis-36-numpydev.yaml b/ci/travis-36-numpydev.yaml index 455d65feb4242..038c6537622dd 100644 --- a/ci/travis-36-numpydev.yaml +++ b/ci/travis-36-numpydev.yaml @@ -4,7 +4,7 @@ channels: dependencies: - python=3.6* - pytz - - Cython + - Cython>=0.28.2 # universal - pytest - pytest-xdist diff --git a/ci/travis-36-slow.yaml b/ci/travis-36-slow.yaml index 6c475dc48723c..f6738e3837186 100644 --- a/ci/travis-36-slow.yaml +++ b/ci/travis-36-slow.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - beautifulsoup4 - - cython + - cython>=0.28.2 - html5lib - lxml - matplotlib diff --git a/ci/travis-36.yaml b/ci/travis-36.yaml index ff4f1a4a86f99..7eceba76cab96 100644 --- a/ci/travis-36.yaml +++ b/ci/travis-36.yaml @@ -4,7 +4,7 @@ channels: - conda-forge dependencies: - beautifulsoup4 - - cython + - cython>=0.28.2 - dask - fastparquet - feather-format diff --git a/ci/travis-37.yaml b/ci/travis-37.yaml index 8b255c9e6ec72..1dc2930bf7287 100644 --- a/ci/travis-37.yaml +++ b/ci/travis-37.yaml @@ -5,7 +5,7 @@ channels: - c3i_test dependencies: - python=3.7 - - cython + - cython>=0.28.2 - numpy - python-dateutil - nomkl diff --git a/doc/source/install.rst b/doc/source/install.rst index a8c5194124829..087bca0d5cd3c 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -253,7 +253,7 @@ Optional Dependencies ~~~~~~~~~~~~~~~~~~~~~ * `Cython `__: Only necessary to build development - version. Version 0.24 or higher. + version. Version 0.28.2 or higher. * `SciPy `__: miscellaneous statistical functions, Version 0.14.0 or higher * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 7362e11b22189..5e8d8fddea392 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -410,12 +410,17 @@ Reshaping - - +Build Changes +^^^^^^^^^^^^^ + +- Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`) - Other ^^^^^ - :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`) +- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`) - - - diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 4d2b6f845eb71..ff6570e2106b2 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -120,7 +120,7 @@ cdef class {{name}}Vector: append_data_{{dtype}}(self.data, x) - cdef extend(self, {{arg}}[:] x): + cdef extend(self, const {{arg}}[:] x): for i in range(len(x)): self.append(x[i]) @@ -253,56 +253,10 @@ dtypes = [('Float64', 'float64', True, 'nan'), ('UInt64', 'uint64', False, 0), ('Int64', 'int64', False, 'iNaT')] -def get_dispatch(dtypes): - for (name, dtype, float_group, default_na_value) in dtypes: - unique_template = """\ - cdef: - Py_ssize_t i, n = len(values) - int ret = 0 - {dtype}_t val - khiter_t k - bint seen_na = 0 - {name}Vector uniques = {name}Vector() - {name}VectorData *ud - - ud = uniques.data - - with nogil: - for i in range(n): - val = values[i] - IF {float_group}: - if val == val: - k = kh_get_{dtype}(self.table, val) - if k == self.table.n_buckets: - kh_put_{dtype}(self.table, val, &ret) - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{dtype}(ud, val) - elif not seen_na: - seen_na = 1 - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{dtype}(ud, NAN) - ELSE: - k = kh_get_{dtype}(self.table, val) - if k == self.table.n_buckets: - kh_put_{dtype}(self.table, val, &ret) - if needs_resize(ud): - with gil: - uniques.resize() - append_data_{dtype}(ud, val) - return uniques.to_array() - """ - - unique_template = unique_template.format(name=name, dtype=dtype, float_group=float_group) - - yield (name, dtype, float_group, default_na_value, unique_template) }} -{{for name, dtype, float_group, default_na_value, unique_template in get_dispatch(dtypes)}} +{{for name, dtype, float_group, default_na_value in dtypes}} cdef class {{name}}HashTable(HashTable): @@ -351,7 +305,7 @@ cdef class {{name}}HashTable(HashTable): raise KeyError(key) @cython.boundscheck(False) - def map(self, {{dtype}}_t[:] keys, int64_t[:] values): + def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -379,7 +333,7 @@ cdef class {{name}}HashTable(HashTable): self.table.vals[k] = i @cython.boundscheck(False) - def lookup(self, {{dtype}}_t[:] values): + def lookup(self, const {{dtype}}_t[:] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -404,7 +358,7 @@ cdef class {{name}}HashTable(HashTable): return uniques.to_array(), labels @cython.boundscheck(False) - def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques, + def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques, Py_ssize_t count_prior, Py_ssize_t na_sentinel, object na_value=None): cdef: @@ -461,7 +415,7 @@ cdef class {{name}}HashTable(HashTable): return np.asarray(labels) @cython.boundscheck(False) - def get_labels_groupby(self, {{dtype}}_t[:] values): + def get_labels_groupby(self, const {{dtype}}_t[:] values): cdef: Py_ssize_t i, n = len(values) int64_t[:] labels @@ -506,20 +460,46 @@ cdef class {{name}}HashTable(HashTable): return np.asarray(labels), arr_uniques @cython.boundscheck(False) - def unique(self, ndarray[{{dtype}}_t, ndim=1] values): - if values.flags.writeable: - # If the value is writeable (mutable) then use memview - return self.unique_memview(values) + def unique(self, const {{dtype}}_t[:] values): + cdef: + Py_ssize_t i, n = len(values) + int ret = 0 + {{dtype}}_t val + khiter_t k + bint seen_na = 0 + {{name}}Vector uniques = {{name}}Vector() + {{name}}VectorData *ud - # We cannot use the memoryview version on readonly-buffers due to - # a limitation of Cython's typed memoryviews. Instead we can use - # the slightly slower Cython ndarray type directly. - # see https://github.com/cython/cython/issues/1605 -{{unique_template}} + ud = uniques.data - @cython.boundscheck(False) - def unique_memview(self, {{dtype}}_t[:] values): -{{unique_template}} + with nogil: + for i in range(n): + val = values[i] + {{if float_group}} + if val == val: + k = kh_get_{{dtype}}(self.table, val) + if k == self.table.n_buckets: + kh_put_{{dtype}}(self.table, val, &ret) + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, val) + elif not seen_na: + seen_na = 1 + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, NAN) + {{else}} + k = kh_get_{{dtype}}(self.table, val) + if k == self.table.n_buckets: + kh_put_{{dtype}}(self.table, val, &ret) + if needs_resize(ud): + with gil: + uniques.resize() + append_data_{{dtype}}(ud, val) + {{endif}} + return uniques.to_array() {{endfor}} diff --git a/pandas/conftest.py b/pandas/conftest.py index 255e0e165041b..84f1d8f29f1f1 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -138,6 +138,14 @@ def compression_only(request): return request.param +@pytest.fixture(params=[True, False]) +def writable(request): + """ + Fixture that an array is writable + """ + return request.param + + @pytest.fixture(scope='module') def datetime_tz_utc(): from datetime import timezone diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 46bd879c2db87..de3c9574a4471 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1077,15 +1077,19 @@ class TestGroupVarFloat32(GroupVarTestMixin): class TestHashTable(object): - def test_lookup_nan(self): + def test_lookup_nan(self, writable): xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3]) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) m = ht.Float64HashTable() m.map_locations(xs) tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), dtype=np.int64)) - def test_lookup_overflow(self): + def test_lookup_overflow(self, writable): xs = np.array([1, 2, 2**63], dtype=np.uint64) + # GH 21688 ensure we can deal with readonly memory views + xs.setflags(write=writable) m = ht.UInt64HashTable() m.map_locations(xs) tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs), @@ -1096,12 +1100,14 @@ def test_get_unique(self): exp = np.array([1, 2, 2**63], dtype=np.uint64) tm.assert_numpy_array_equal(s.unique(), exp) - def test_vector_resize(self): + def test_vector_resize(self, writable): # Test for memory errors after internal vector # reallocations (pull request #7157) def _test_vector_resize(htable, uniques, dtype, nvals, safely_resizes): vals = np.array(np.random.randn(1000), dtype=dtype) + # GH 21688 ensure we can deal with readonly memory views + vals.setflags(write=writable) # get_labels may append to uniques htable.get_labels(vals[:nvals], uniques, 0, -1) # to_array() set an external_view_exists flag on uniques. diff --git a/setup.py b/setup.py index 29b19fed0e2d4..8018d71b74655 100755 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ def is_platform_mac(): return sys.platform == 'darwin' -min_cython_ver = '0.24' +min_cython_ver = '0.28.2' try: import Cython ver = Cython.__version__