diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 9a023ce78bb6f..a59b38c40b03c 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -508,3 +508,5 @@ Bug Fixes - Removed ``millisecond`` property of ``DatetimeIndex``. This would always raise a ``ValueError`` (:issue:`12019`). + +- Bug in Series constructor with read-only data (:issue:`11502`) diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index d137ce732e005..cf42279c89508 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -88,13 +88,7 @@ """ -take_1d_template = """ -@cython.wraparound(False) -@cython.boundscheck(False) -def take_1d_%(name)s_%(dest)s(%(c_type_in)s[:] values, - int64_t[:] indexer, - %(c_type_out)s[:] out, - fill_value=np.nan): +inner_take_1d_template = """\ cdef: Py_ssize_t i, n, idx %(c_type_out)s fv @@ -112,6 +106,33 @@ def take_1d_%(name)s_%(dest)s(%(c_type_in)s[:] values, %(tab)s out[i] = %(preval)svalues[idx]%(postval)s """ +take_1d_template = """\ +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_%(name)s_%(dest)s_memview(%(c_type_in)s[:] values, + int64_t[:] indexer, + %(c_type_out)s[:] out, + fill_value=np.nan): +""" + inner_take_1d_template + """ + +@cython.wraparound(False) +@cython.boundscheck(False) +def take_1d_%(name)s_%(dest)s(ndarray[%(c_type_in)s, ndim=1] values, + int64_t[:] indexer, + %(c_type_out)s[:] out, + fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_%(name)s_%(dest)s_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. +""" + inner_take_1d_template + inner_take_2d_axis0_template = """\ cdef: Py_ssize_t i, j, k, n, idx diff --git a/pandas/src/generated.pyx b/pandas/src/generated.pyx index 738f695a6ce9f..99031da48dd20 100644 --- a/pandas/src/generated.pyx +++ b/pandas/src/generated.pyx @@ -2403,13 +2403,45 @@ def arrmap_bool(ndarray[uint8_t] index, object func): return maybe_convert_objects(result) +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_bool_bool_memview(uint8_t[:] values, + int64_t[:] indexer, + uint8_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + uint8_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_bool_bool(uint8_t[:] values, +def take_1d_bool_bool(ndarray[uint8_t, ndim=1] values, int64_t[:] indexer, uint8_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_bool_bool_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx uint8_t fv @@ -2426,13 +2458,45 @@ def take_1d_bool_bool(uint8_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_bool_object_memview(uint8_t[:] values, + int64_t[:] indexer, + object[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + object fv + + n = indexer.shape[0] + + fv = fill_value + + + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = True if values[idx] > 0 else False + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_bool_object(uint8_t[:] values, +def take_1d_bool_object(ndarray[uint8_t, ndim=1] values, int64_t[:] indexer, object[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_bool_object_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx object fv @@ -2449,13 +2513,45 @@ def take_1d_bool_object(uint8_t[:] values, else: out[i] = True if values[idx] > 0 else False +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int8_int8_memview(int8_t[:] values, + int64_t[:] indexer, + int8_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int8_t fv + + n = indexer.shape[0] + + fv = fill_value + + + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int8_int8(int8_t[:] values, +def take_1d_int8_int8(ndarray[int8_t, ndim=1] values, int64_t[:] indexer, int8_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int8_int8_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int8_t fv @@ -2472,13 +2568,45 @@ def take_1d_int8_int8(int8_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int8_int32_memview(int8_t[:] values, + int64_t[:] indexer, + int32_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int32_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int8_int32(int8_t[:] values, +def take_1d_int8_int32(ndarray[int8_t, ndim=1] values, int64_t[:] indexer, int32_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int8_int32_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int32_t fv @@ -2495,13 +2623,45 @@ def take_1d_int8_int32(int8_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int8_int64_memview(int8_t[:] values, + int64_t[:] indexer, + int64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int8_int64(int8_t[:] values, +def take_1d_int8_int64(ndarray[int8_t, ndim=1] values, int64_t[:] indexer, int64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int8_int64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int64_t fv @@ -2518,13 +2678,45 @@ def take_1d_int8_int64(int8_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int8_float64_memview(int8_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int8_float64(int8_t[:] values, +def take_1d_int8_float64(ndarray[int8_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int8_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float64_t fv @@ -2541,13 +2733,45 @@ def take_1d_int8_float64(int8_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int16_int16_memview(int16_t[:] values, + int64_t[:] indexer, + int16_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int16_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int16_int16(int16_t[:] values, +def take_1d_int16_int16(ndarray[int16_t, ndim=1] values, int64_t[:] indexer, int16_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int16_int16_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int16_t fv @@ -2564,13 +2788,45 @@ def take_1d_int16_int16(int16_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int16_int32_memview(int16_t[:] values, + int64_t[:] indexer, + int32_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int32_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int16_int32(int16_t[:] values, +def take_1d_int16_int32(ndarray[int16_t, ndim=1] values, int64_t[:] indexer, int32_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int16_int32_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int32_t fv @@ -2587,13 +2843,45 @@ def take_1d_int16_int32(int16_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int16_int64_memview(int16_t[:] values, + int64_t[:] indexer, + int64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int16_int64(int16_t[:] values, +def take_1d_int16_int64(ndarray[int16_t, ndim=1] values, int64_t[:] indexer, int64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int16_int64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int64_t fv @@ -2610,13 +2898,45 @@ def take_1d_int16_int64(int16_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int16_float64_memview(int16_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int16_float64(int16_t[:] values, +def take_1d_int16_float64(ndarray[int16_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int16_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float64_t fv @@ -2633,13 +2953,45 @@ def take_1d_int16_float64(int16_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int32_int32_memview(int32_t[:] values, + int64_t[:] indexer, + int32_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int32_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int32_int32(int32_t[:] values, +def take_1d_int32_int32(ndarray[int32_t, ndim=1] values, int64_t[:] indexer, int32_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int32_int32_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int32_t fv @@ -2656,13 +3008,45 @@ def take_1d_int32_int32(int32_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int32_int64_memview(int32_t[:] values, + int64_t[:] indexer, + int64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int32_int64(int32_t[:] values, +def take_1d_int32_int64(ndarray[int32_t, ndim=1] values, int64_t[:] indexer, int64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int32_int64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int64_t fv @@ -2679,13 +3063,45 @@ def take_1d_int32_int64(int32_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int32_float64_memview(int32_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int32_float64(int32_t[:] values, +def take_1d_int32_float64(ndarray[int32_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int32_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float64_t fv @@ -2702,13 +3118,45 @@ def take_1d_int32_float64(int32_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int64_int64_memview(int64_t[:] values, + int64_t[:] indexer, + int64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + int64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int64_int64(int64_t[:] values, +def take_1d_int64_int64(ndarray[int64_t, ndim=1] values, int64_t[:] indexer, int64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int64_int64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx int64_t fv @@ -2725,13 +3173,45 @@ def take_1d_int64_int64(int64_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_int64_float64_memview(int64_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_int64_float64(int64_t[:] values, +def take_1d_int64_float64(ndarray[int64_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_int64_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float64_t fv @@ -2748,13 +3228,45 @@ def take_1d_int64_float64(int64_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_float32_float32_memview(float32_t[:] values, + int64_t[:] indexer, + float32_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float32_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_float32_float32(float32_t[:] values, +def take_1d_float32_float32(ndarray[float32_t, ndim=1] values, int64_t[:] indexer, float32_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_float32_float32_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float32_t fv @@ -2771,13 +3283,67 @@ def take_1d_float32_float32(float32_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_float32_float64_memview(float32_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_float32_float64(float32_t[:] values, +def take_1d_float32_float64(ndarray[float32_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_float32_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. + cdef: + Py_ssize_t i, n, idx + float64_t fv + + n = indexer.shape[0] + + fv = fill_value + + with nogil: + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_float64_float64_memview(float64_t[:] values, + int64_t[:] indexer, + float64_t[:] out, + fill_value=np.nan): cdef: Py_ssize_t i, n, idx float64_t fv @@ -2797,10 +3363,20 @@ def take_1d_float32_float64(float32_t[:] values, @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_float64_float64(float64_t[:] values, +def take_1d_float64_float64(ndarray[float64_t, ndim=1] values, int64_t[:] indexer, float64_t[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_float64_float64_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx float64_t fv @@ -2817,13 +3393,45 @@ def take_1d_float64_float64(float64_t[:] values, else: out[i] = values[idx] +@cython.wraparound(False) +@cython.boundscheck(False) +cdef inline take_1d_object_object_memview(object[:] values, + int64_t[:] indexer, + object[:] out, + fill_value=np.nan): + cdef: + Py_ssize_t i, n, idx + object fv + + n = indexer.shape[0] + + fv = fill_value + + + for i from 0 <= i < n: + idx = indexer[i] + if idx == -1: + out[i] = fv + else: + out[i] = values[idx] + @cython.wraparound(False) @cython.boundscheck(False) -def take_1d_object_object(object[:] values, +def take_1d_object_object(ndarray[object, ndim=1] values, int64_t[:] indexer, object[:] out, fill_value=np.nan): + + if values.flags.writeable: + # We can call the memoryview version of the code + take_1d_object_object_memview(values, indexer, out, + fill_value=fill_value) + return + + # We cannot use the memoryview version on readonly-buffers due to + # a limitation of Cython's typed memoryviews. Instead we can use + # the slightly slower Cython ndarray type directly. cdef: Py_ssize_t i, n, idx object fv diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 664e4a4e078fe..bc204740567de 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -774,8 +774,9 @@ class TestTake(tm.TestCase): _multiprocess_can_split_ = True def test_1d_with_out(self): - def _test_dtype(dtype, can_hold_na): + def _test_dtype(dtype, can_hold_na, writeable=True): data = np.random.randint(0, 2, 4).astype(dtype) + data.flags.writeable = writeable indexer = [2, 1, 0, 1] out = np.empty(4, dtype=dtype) @@ -796,18 +797,22 @@ def _test_dtype(dtype, can_hold_na): # no exception o/w data.take(indexer, out=out) - _test_dtype(np.float64, True) - _test_dtype(np.float32, True) - _test_dtype(np.uint64, False) - _test_dtype(np.uint32, False) - _test_dtype(np.uint16, False) - _test_dtype(np.uint8, False) - _test_dtype(np.int64, False) - _test_dtype(np.int32, False) - _test_dtype(np.int16, False) - _test_dtype(np.int8, False) - _test_dtype(np.object_, True) - _test_dtype(np.bool, False) + for writeable in [True, False]: + # Check that take_nd works both with writeable arrays (in which + # case fast typed memoryviews implementation) and read-only + # arrays alike. + _test_dtype(np.float64, True, writeable=writeable) + _test_dtype(np.float32, True, writeable=writeable) + _test_dtype(np.uint64, False, writeable=writeable) + _test_dtype(np.uint32, False, writeable=writeable) + _test_dtype(np.uint16, False, writeable=writeable) + _test_dtype(np.uint8, False, writeable=writeable) + _test_dtype(np.int64, False, writeable=writeable) + _test_dtype(np.int32, False, writeable=writeable) + _test_dtype(np.int16, False, writeable=writeable) + _test_dtype(np.int8, False, writeable=writeable) + _test_dtype(np.object_, True, writeable=writeable) + _test_dtype(np.bool, False, writeable=writeable) def test_1d_fill_nonna(self): def _test_dtype(dtype, fill_value, out_dtype):