Skip to content

Commit dcbf8b5

Browse files
xhochyjreback
authored andcommitted
Accept constant memoryviews in HashTable.lookup (#21688)
1 parent 3091755 commit dcbf8b5

21 files changed

+83
-84
lines changed

ci/appveyor-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- xlsxwriter
2525
- xlwt
2626
# universal
27-
- cython
27+
- cython>=0.28.2
2828
- pytest
2929
- pytest-xdist
3030
- moto

ci/appveyor-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,6 @@ dependencies:
2222
- xlsxwriter
2323
- xlwt
2424
# universal
25-
- cython
25+
- cython>=0.28.2
2626
- pytest
2727
- pytest-xdist

ci/circle-27-compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- bottleneck=1.0.0
7-
- cython=0.24
7+
- cython=0.28.2
88
- jinja2=2.8
99
- numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
1010
- numpy=1.9.2

ci/circle-35-ascii.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: pandas
22
channels:
33
- defaults
44
dependencies:
5-
- cython
5+
- cython>=0.28.2
66
- nomkl
77
- numpy
88
- python-dateutil

ci/circle-36-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- beautifulsoup4
7-
- cython
7+
- cython>=0.28.2
88
- html5lib
99
- ipython
1010
- jinja2

ci/circle-36-locale_slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- beautifulsoup4
7-
- cython
7+
- cython>=0.28.2
88
- gcsfs
99
- html5lib
1010
- ipython

ci/environment-dev.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- Cython
6+
- Cython>=0.28.2
77
- NumPy
88
- flake8
99
- moto

ci/travis-27-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- bottleneck=1.0.0
7-
- cython=0.24
7+
- cython=0.28.2
88
- lxml
99
- matplotlib=1.4.3
1010
- numpy=1.9.2

ci/travis-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ channels:
55
dependencies:
66
- beautifulsoup4
77
- bottleneck
8-
- cython=0.24
8+
- cython=0.28.2
99
- fastparquet
1010
- feather-format
1111
- flake8=3.4.1

ci/travis-35-osx.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
dependencies:
55
- beautifulsoup4
66
- bottleneck
7-
- cython
7+
- cython>=0.28.2
88
- html5lib
99
- jinja2
1010
- lxml

ci/travis-36-doc.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ channels:
66
dependencies:
77
- beautifulsoup4
88
- bottleneck
9-
- cython
9+
- cython>=0.28.2
1010
- fastparquet
1111
- feather-format
1212
- html5lib

ci/travis-36-numpydev.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
dependencies:
55
- python=3.6*
66
- pytz
7-
- Cython
7+
- Cython>=0.28.2
88
# universal
99
- pytest
1010
- pytest-xdist

ci/travis-36-slow.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- beautifulsoup4
7-
- cython
7+
- cython>=0.28.2
88
- html5lib
99
- lxml
1010
- matplotlib

ci/travis-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ channels:
44
- conda-forge
55
dependencies:
66
- beautifulsoup4
7-
- cython
7+
- cython>=0.28.2
88
- dask
99
- fastparquet
1010
- feather-format

ci/travis-37.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ channels:
55
- c3i_test
66
dependencies:
77
- python=3.7
8-
- cython
8+
- cython>=0.28.2
99
- numpy
1010
- python-dateutil
1111
- nomkl

doc/source/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,7 @@ Optional Dependencies
253253
~~~~~~~~~~~~~~~~~~~~~
254254

255255
* `Cython <http://www.cython.org>`__: Only necessary to build development
256-
version. Version 0.24 or higher.
256+
version. Version 0.28.2 or higher.
257257
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions, Version 0.14.0 or higher
258258
* `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
259259
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.

doc/source/whatsnew/v0.24.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -410,12 +410,17 @@ Reshaping
410410
-
411411
-
412412

413+
Build Changes
414+
^^^^^^^^^^^^^
415+
416+
- Building pandas for development now requires ``cython >= 0.28.2`` (:issue:`21688`)
413417
-
414418

415419
Other
416420
^^^^^
417421

418422
- :meth: `~pandas.io.formats.style.Styler.background_gradient` now takes a ``text_color_threshold`` parameter to automatically lighten the text color based on the luminance of the background color. This improves readability with dark background colors without the need to limit the background colormap range. (:issue:`21258`)
423+
- Require at least 0.28.2 version of ``cython`` to support read-only memoryviews (:issue:`21688`)
419424
-
420425
-
421426
-

pandas/_libs/hashtable_class_helper.pxi.in

+44-64
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ cdef class {{name}}Vector:
120120

121121
append_data_{{dtype}}(self.data, x)
122122

123-
cdef extend(self, {{arg}}[:] x):
123+
cdef extend(self, const {{arg}}[:] x):
124124
for i in range(len(x)):
125125
self.append(x[i])
126126

@@ -253,56 +253,10 @@ dtypes = [('Float64', 'float64', True, 'nan'),
253253
('UInt64', 'uint64', False, 0),
254254
('Int64', 'int64', False, 'iNaT')]
255255

256-
def get_dispatch(dtypes):
257-
for (name, dtype, float_group, default_na_value) in dtypes:
258-
unique_template = """\
259-
cdef:
260-
Py_ssize_t i, n = len(values)
261-
int ret = 0
262-
{dtype}_t val
263-
khiter_t k
264-
bint seen_na = 0
265-
{name}Vector uniques = {name}Vector()
266-
{name}VectorData *ud
267-
268-
ud = uniques.data
269-
270-
with nogil:
271-
for i in range(n):
272-
val = values[i]
273-
IF {float_group}:
274-
if val == val:
275-
k = kh_get_{dtype}(self.table, val)
276-
if k == self.table.n_buckets:
277-
kh_put_{dtype}(self.table, val, &ret)
278-
if needs_resize(ud):
279-
with gil:
280-
uniques.resize()
281-
append_data_{dtype}(ud, val)
282-
elif not seen_na:
283-
seen_na = 1
284-
if needs_resize(ud):
285-
with gil:
286-
uniques.resize()
287-
append_data_{dtype}(ud, NAN)
288-
ELSE:
289-
k = kh_get_{dtype}(self.table, val)
290-
if k == self.table.n_buckets:
291-
kh_put_{dtype}(self.table, val, &ret)
292-
if needs_resize(ud):
293-
with gil:
294-
uniques.resize()
295-
append_data_{dtype}(ud, val)
296-
return uniques.to_array()
297-
"""
298-
299-
unique_template = unique_template.format(name=name, dtype=dtype, float_group=float_group)
300-
301-
yield (name, dtype, float_group, default_na_value, unique_template)
302256
}}
303257

304258

305-
{{for name, dtype, float_group, default_na_value, unique_template in get_dispatch(dtypes)}}
259+
{{for name, dtype, float_group, default_na_value in dtypes}}
306260

307261
cdef class {{name}}HashTable(HashTable):
308262

@@ -351,7 +305,7 @@ cdef class {{name}}HashTable(HashTable):
351305
raise KeyError(key)
352306

353307
@cython.boundscheck(False)
354-
def map(self, {{dtype}}_t[:] keys, int64_t[:] values):
308+
def map(self, const {{dtype}}_t[:] keys, const int64_t[:] values):
355309
cdef:
356310
Py_ssize_t i, n = len(values)
357311
int ret = 0
@@ -379,7 +333,7 @@ cdef class {{name}}HashTable(HashTable):
379333
self.table.vals[k] = i
380334

381335
@cython.boundscheck(False)
382-
def lookup(self, {{dtype}}_t[:] values):
336+
def lookup(self, const {{dtype}}_t[:] values):
383337
cdef:
384338
Py_ssize_t i, n = len(values)
385339
int ret = 0
@@ -404,7 +358,7 @@ cdef class {{name}}HashTable(HashTable):
404358
return uniques.to_array(), labels
405359

406360
@cython.boundscheck(False)
407-
def get_labels(self, {{dtype}}_t[:] values, {{name}}Vector uniques,
361+
def get_labels(self, const {{dtype}}_t[:] values, {{name}}Vector uniques,
408362
Py_ssize_t count_prior, Py_ssize_t na_sentinel,
409363
object na_value=None):
410364
cdef:
@@ -461,7 +415,7 @@ cdef class {{name}}HashTable(HashTable):
461415
return np.asarray(labels)
462416

463417
@cython.boundscheck(False)
464-
def get_labels_groupby(self, {{dtype}}_t[:] values):
418+
def get_labels_groupby(self, const {{dtype}}_t[:] values):
465419
cdef:
466420
Py_ssize_t i, n = len(values)
467421
int64_t[:] labels
@@ -506,20 +460,46 @@ cdef class {{name}}HashTable(HashTable):
506460
return np.asarray(labels), arr_uniques
507461

508462
@cython.boundscheck(False)
509-
def unique(self, ndarray[{{dtype}}_t, ndim=1] values):
510-
if values.flags.writeable:
511-
# If the value is writeable (mutable) then use memview
512-
return self.unique_memview(values)
463+
def unique(self, const {{dtype}}_t[:] values):
464+
cdef:
465+
Py_ssize_t i, n = len(values)
466+
int ret = 0
467+
{{dtype}}_t val
468+
khiter_t k
469+
bint seen_na = 0
470+
{{name}}Vector uniques = {{name}}Vector()
471+
{{name}}VectorData *ud
513472

514-
# We cannot use the memoryview version on readonly-buffers due to
515-
# a limitation of Cython's typed memoryviews. Instead we can use
516-
# the slightly slower Cython ndarray type directly.
517-
# see https://github.com/cython/cython/issues/1605
518-
{{unique_template}}
473+
ud = uniques.data
519474

520-
@cython.boundscheck(False)
521-
def unique_memview(self, {{dtype}}_t[:] values):
522-
{{unique_template}}
475+
with nogil:
476+
for i in range(n):
477+
val = values[i]
478+
{{if float_group}}
479+
if val == val:
480+
k = kh_get_{{dtype}}(self.table, val)
481+
if k == self.table.n_buckets:
482+
kh_put_{{dtype}}(self.table, val, &ret)
483+
if needs_resize(ud):
484+
with gil:
485+
uniques.resize()
486+
append_data_{{dtype}}(ud, val)
487+
elif not seen_na:
488+
seen_na = 1
489+
if needs_resize(ud):
490+
with gil:
491+
uniques.resize()
492+
append_data_{{dtype}}(ud, NAN)
493+
{{else}}
494+
k = kh_get_{{dtype}}(self.table, val)
495+
if k == self.table.n_buckets:
496+
kh_put_{{dtype}}(self.table, val, &ret)
497+
if needs_resize(ud):
498+
with gil:
499+
uniques.resize()
500+
append_data_{{dtype}}(ud, val)
501+
{{endif}}
502+
return uniques.to_array()
523503

524504
{{endfor}}
525505

pandas/conftest.py

+8
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,14 @@ def compression_only(request):
138138
return request.param
139139

140140

141+
@pytest.fixture(params=[True, False])
142+
def writable(request):
143+
"""
144+
Fixture that an array is writable
145+
"""
146+
return request.param
147+
148+
141149
@pytest.fixture(scope='module')
142150
def datetime_tz_utc():
143151
from datetime import timezone

pandas/tests/test_algos.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1077,15 +1077,19 @@ class TestGroupVarFloat32(GroupVarTestMixin):
10771077

10781078
class TestHashTable(object):
10791079

1080-
def test_lookup_nan(self):
1080+
def test_lookup_nan(self, writable):
10811081
xs = np.array([2.718, 3.14, np.nan, -7, 5, 2, 3])
1082+
# GH 21688 ensure we can deal with readonly memory views
1083+
xs.setflags(write=writable)
10821084
m = ht.Float64HashTable()
10831085
m.map_locations(xs)
10841086
tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs),
10851087
dtype=np.int64))
10861088

1087-
def test_lookup_overflow(self):
1089+
def test_lookup_overflow(self, writable):
10881090
xs = np.array([1, 2, 2**63], dtype=np.uint64)
1091+
# GH 21688 ensure we can deal with readonly memory views
1092+
xs.setflags(write=writable)
10891093
m = ht.UInt64HashTable()
10901094
m.map_locations(xs)
10911095
tm.assert_numpy_array_equal(m.lookup(xs), np.arange(len(xs),
@@ -1096,12 +1100,14 @@ def test_get_unique(self):
10961100
exp = np.array([1, 2, 2**63], dtype=np.uint64)
10971101
tm.assert_numpy_array_equal(s.unique(), exp)
10981102

1099-
def test_vector_resize(self):
1103+
def test_vector_resize(self, writable):
11001104
# Test for memory errors after internal vector
11011105
# reallocations (pull request #7157)
11021106

11031107
def _test_vector_resize(htable, uniques, dtype, nvals, safely_resizes):
11041108
vals = np.array(np.random.randn(1000), dtype=dtype)
1109+
# GH 21688 ensure we can deal with readonly memory views
1110+
vals.setflags(write=writable)
11051111
# get_labels may append to uniques
11061112
htable.get_labels(vals[:nvals], uniques, 0, -1)
11071113
# to_array() set an external_view_exists flag on uniques.

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def is_platform_mac():
3232
return sys.platform == 'darwin'
3333

3434

35-
min_cython_ver = '0.24'
35+
min_cython_ver = '0.28.2'
3636
try:
3737
import Cython
3838
ver = Cython.__version__

0 commit comments

Comments
 (0)