From 3fb0d0a73dd4785582f9f90647c34b57083b451a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 23:29:32 -0800 Subject: [PATCH 1/4] implement libinternals --- pandas/_libs/internals.pyx | 449 +++++++++++++++++++++++++++++++++++++ pandas/_libs/lib.pyx | 422 ---------------------------------- pandas/core/internals.py | 12 +- setup.py | 3 + 4 files changed, 459 insertions(+), 427 deletions(-) create mode 100644 pandas/_libs/internals.pyx diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx new file mode 100644 index 0000000000000..7f0e91707b69f --- /dev/null +++ b/pandas/_libs/internals.pyx @@ -0,0 +1,449 @@ +# -*- coding: utf-8 -*- + +cimport cython +from cython cimport Py_ssize_t + +from cpython cimport PyObject + +import numpy as np +cimport numpy as np +from numpy cimport int64_t + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +cdef extern from "compat_helper.h": + cdef int slice_get_indices(PyObject* s, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t *step, + Py_ssize_t *slicelength) except -1 + + +cdef class BlockPlacement: + # __slots__ = '_as_slice', '_as_array', '_len' + cdef: + slice _as_slice + object _as_array + bint _has_slice, _has_array, _is_known_slice_like + + def __init__(self, val): + cdef: + slice slc + + self._has_slice = False + self._has_array = False + + if isinstance(val, slice): + slc = slice_canonize(val) + + if slc.start != slc.stop: + self._as_slice = slc + self._has_slice = True + else: + arr = np.empty(0, dtype=np.int64) + self._as_array = arr + self._has_array = True + else: + # Cython memoryview interface requires ndarray to be writeable. + arr = np.require(val, dtype=np.int64, requirements='W') + assert arr.ndim == 1 + self._as_array = arr + self._has_array = True + + def __str__(self): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + v = self._as_slice + else: + v = self._as_array + + return '%s(%r)' % (self.__class__.__name__, v) + + __repr__ = __str__ + + def __len__(self): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + return slice_len(s) + else: + return len(self._as_array) + + def __iter__(self): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t start, stop, step, _ + if s is not None: + start, stop, step, _ = slice_get_indices_ex(s) + return iter(range(start, stop, step)) + else: + return iter(self._as_array) + + @property + def as_slice(self): + cdef: + slice s = self._ensure_has_slice() + if s is None: + raise TypeError('Not slice-like') + else: + return s + + @property + def indexer(self): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + return s + else: + return self._as_array + + def isin(self, arr): + from pandas.core.index import Int64Index + return Int64Index(self.as_array, copy=False).isin(arr) + + @property + def as_array(self): + cdef: + Py_ssize_t start, stop, end, _ + if not self._has_array: + start, stop, step, _ = slice_get_indices_ex(self._as_slice) + self._as_array = np.arange(start, stop, step, + dtype=np.int64) + self._has_array = True + return self._as_array + + @property + def is_slice_like(self): + cdef: + slice s = self._ensure_has_slice() + return s is not None + + def __getitem__(self, loc): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] + + if not isinstance(val, slice) and val.ndim == 0: + return val + + return BlockPlacement(val) + + def delete(self, loc): + return BlockPlacement(np.delete(self.as_array, loc, axis=0)) + + def append(self, others): + if len(others) == 0: + return self + + return BlockPlacement(np.concatenate([self.as_array] + + [o.as_array for o in others])) + + cdef iadd(self, other): + cdef: + slice s = self._ensure_has_slice() + Py_ssize_t other_int, start, stop, step, l + + if isinstance(other, int) and s is not None: + other_int = other + + if other_int == 0: + return self + + start, stop, step, l = slice_get_indices_ex(s) + start += other_int + stop += other_int + + if ((step > 0 and start < 0) or + (step < 0 and stop < step)): + raise ValueError("iadd causes length change") + + if stop < 0: + self._as_slice = slice(start, None, step) + else: + self._as_slice = slice(start, stop, step) + + self._has_array = False + self._as_array = None + else: + newarr = self.as_array + other + if (newarr < 0).any(): + raise ValueError("iadd causes length change") + + self._as_array = newarr + self._has_array = True + self._has_slice = False + self._as_slice = None + + return self + + cdef BlockPlacement copy(self): + cdef: + slice s = self._ensure_has_slice() + if s is not None: + return BlockPlacement(s) + else: + return BlockPlacement(self._as_array) + + def add(self, other): + return self.copy().iadd(other) + + def sub(self, other): + return self.add(-other) + + cdef slice _ensure_has_slice(self): + if not self._has_slice: + self._as_slice = indexer_as_slice(self._as_array) + self._has_slice = True + return self._as_slice + + +cdef slice_canonize(slice s): + """ + Convert slice to canonical bounded form. + """ + cdef: + Py_ssize_t start = 0, stop = 0, step = 1, length + + if s.step is None: + step = 1 + else: + step = s.step + if step == 0: + raise ValueError("slice step cannot be zero") + + if step > 0: + if s.stop is None: + raise ValueError("unbounded slice") + + stop = s.stop + if s.start is None: + start = 0 + else: + start = s.start + if start > stop: + start = stop + elif step < 0: + if s.start is None: + raise ValueError("unbounded slice") + + start = s.start + if s.stop is None: + stop = -1 + else: + stop = s.stop + if stop > start: + stop = start + + if start < 0 or (stop < 0 and s.stop is not None): + raise ValueError("unbounded slice") + + if stop < 0: + return slice(start, None, step) + else: + return slice(start, stop, step) + + +cpdef Py_ssize_t slice_len(slice slc, + Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: + """ + Get length of a bounded slice. + + The slice must not have any "open" bounds that would create dependency on + container size, i.e.: + - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` + - if ``s.step < 0``, ``s.start`` is not ``None`` + + Otherwise, the result is unreliable. + + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc must be slice") + + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) + + return length + + +cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): + """ + Get (start, stop, step, length) tuple for a slice. + + If `objlen` is not specified, slice must be bounded, otherwise the result + will be wrong. + + """ + cdef: + Py_ssize_t start, stop, step, length + + if slc is None: + raise TypeError("slc should be a slice") + + slice_get_indices(slc, objlen, + &start, &stop, &step, &length) + + return start, stop, step, length + + +cdef slice_getitem(slice slc, ind): + cdef: + Py_ssize_t s_start, s_stop, s_step, s_len + Py_ssize_t ind_start, ind_stop, ind_step, ind_len + + s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) + + if isinstance(ind, slice): + ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, + s_len) + + if ind_step > 0 and ind_len == s_len: + # short-cut for no-op slice + if ind_len == s_len: + return slc + + if ind_step < 0: + s_start = s_stop - s_step + ind_step = -ind_step + + s_step *= ind_step + s_stop = s_start + ind_stop * s_step + s_start = s_start + ind_start * s_step + + if s_step < 0 and s_stop < 0: + return slice(s_start, None, s_step) + else: + return slice(s_start, s_stop, s_step) + + else: + return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] + + +@cython.boundscheck(False) +@cython.wraparound(False) +cpdef slice indexer_as_slice(int64_t[:] vals): + cdef: + Py_ssize_t i, n, start, stop + int64_t d + + if vals is None: + raise TypeError("vals must be ndarray") + + n = vals.shape[0] + + if n == 0 or vals[0] < 0: + return None + + if n == 1: + return slice(vals[0], vals[0] + 1, 1) + + if vals[1] < 0: + return None + + # n > 2 + d = vals[1] - vals[0] + + if d == 0: + return None + + for i in range(2, n): + if vals[i] < 0 or vals[i] - vals[i - 1] != d: + return None + + start = vals[0] + stop = start + n * d + if stop < 0 and d < 0: + return slice(start, None, d) + else: + return slice(start, stop, d) + + +@cython.boundscheck(False) +@cython.wraparound(False) +def get_blkno_indexers(int64_t[:] blknos, bint group=True): + """ + Enumerate contiguous runs of integers in ndarray. + + Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` + pairs for each contiguous run found. + + If `group` is True and there is more than one run for a certain blkno, + ``(blkno, array)`` with an array containing positions of all elements equal + to blkno. + + Returns + ------- + iter : iterator of (int, slice or array) + + """ + # There's blkno in this function's name because it's used in block & + # blockno handling. + cdef: + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff + + object blkno + list group_order + dict group_slices + int64_t[:] res_view + + n = blknos.shape[0] + + if n == 0: + return + + start = 0 + cur_blkno = blknos[start] + + if group is False: + for i in range(1, n): + if blknos[i] != cur_blkno: + yield cur_blkno, slice(start, i) + + start = i + cur_blkno = blknos[i] + + yield cur_blkno, slice(start, n) + else: + group_order = [] + group_dict = {} + + for i in range(1, n): + if blknos[i] != cur_blkno: + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, i)] + else: + group_dict[cur_blkno].append((start, i)) + + start = i + cur_blkno = blknos[i] + + if cur_blkno not in group_dict: + group_order.append(cur_blkno) + group_dict[cur_blkno] = [(start, n)] + else: + group_dict[cur_blkno].append((start, n)) + + for blkno in group_order: + slices = group_dict[blkno] + if len(slices) == 1: + yield blkno, slice(slices[0][0], slices[0][1]) + else: + tot_len = sum(stop - start for start, stop in slices) + result = np.empty(tot_len, dtype=np.int64) + res_view = result + + i = 0 + for start, stop in slices: + for diff in range(start, stop): + res_view[i] = diff + i += 1 + + yield blkno, result diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f6c70027ae6f1..4c967f45bb967 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -35,9 +35,6 @@ try: except ImportError: from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE -cdef extern from "Python.h": - Py_ssize_t PY_SSIZE_T_MAX - cdef extern from "compat_helper.h": cdef int slice_get_indices( @@ -1161,424 +1158,5 @@ def indices_fast(object index, ndarray[int64_t] labels, list keys, return result -@cython.boundscheck(False) -@cython.wraparound(False) -def get_blkno_indexers(int64_t[:] blknos, bint group=True): - """ - Enumerate contiguous runs of integers in ndarray. - - Iterate over elements of `blknos` yielding ``(blkno, slice(start, stop))`` - pairs for each contiguous run found. - - If `group` is True and there is more than one run for a certain blkno, - ``(blkno, array)`` with an array containing positions of all elements equal - to blkno. - - Returns - ------- - iter : iterator of (int, slice or array) - - """ - # There's blkno in this function's name because it's used in block & - # blockno handling. - cdef: - int64_t cur_blkno - Py_ssize_t i, start, stop, n, diff - - object blkno - list group_order - dict group_slices - int64_t[:] res_view - - n = blknos.shape[0] - - if n == 0: - return - - start = 0 - cur_blkno = blknos[start] - - if group == False: - for i in range(1, n): - if blknos[i] != cur_blkno: - yield cur_blkno, slice(start, i) - - start = i - cur_blkno = blknos[i] - - yield cur_blkno, slice(start, n) - else: - group_order = [] - group_dict = {} - - for i in range(1, n): - if blknos[i] != cur_blkno: - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, i)] - else: - group_dict[cur_blkno].append((start, i)) - - start = i - cur_blkno = blknos[i] - - if cur_blkno not in group_dict: - group_order.append(cur_blkno) - group_dict[cur_blkno] = [(start, n)] - else: - group_dict[cur_blkno].append((start, n)) - - for blkno in group_order: - slices = group_dict[blkno] - if len(slices) == 1: - yield blkno, slice(slices[0][0], slices[0][1]) - else: - tot_len = sum(stop - start for start, stop in slices) - result = np.empty(tot_len, dtype=np.int64) - res_view = result - - i = 0 - for start, stop in slices: - for diff in range(start, stop): - res_view[i] = diff - i += 1 - - yield blkno, result - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef slice indexer_as_slice(int64_t[:] vals): - cdef: - Py_ssize_t i, n, start, stop - int64_t d - - if vals is None: - raise TypeError("vals must be ndarray") - - n = vals.shape[0] - - if n == 0 or vals[0] < 0: - return None - - if n == 1: - return slice(vals[0], vals[0] + 1, 1) - - if vals[1] < 0: - return None - - # n > 2 - d = vals[1] - vals[0] - - if d == 0: - return None - - for i in range(2, n): - if vals[i] < 0 or vals[i] - vals[i - 1] != d: - return None - - start = vals[0] - stop = start + n * d - if stop < 0 and d < 0: - return slice(start, None, d) - else: - return slice(start, stop, d) - - -cpdef slice_canonize(slice s): - """ - Convert slice to canonical bounded form. - """ - cdef: - Py_ssize_t start = 0, stop = 0, step = 1, length - - if s.step is None: - step = 1 - else: - step = s.step - if step == 0: - raise ValueError("slice step cannot be zero") - - if step > 0: - if s.stop is None: - raise ValueError("unbounded slice") - - stop = s.stop - if s.start is None: - start = 0 - else: - start = s.start - if start > stop: - start = stop - elif step < 0: - if s.start is None: - raise ValueError("unbounded slice") - - start = s.start - if s.stop is None: - stop = -1 - else: - stop = s.stop - if stop > start: - stop = start - - if start < 0 or (stop < 0 and s.stop is not None): - raise ValueError("unbounded slice") - - if stop < 0: - return slice(start, None, step) - else: - return slice(start, stop, step) - - -cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): - """ - Get (start, stop, step, length) tuple for a slice. - - If `objlen` is not specified, slice must be bounded, otherwise the result - will be wrong. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc should be a slice") - - slice_get_indices(slc, objlen, - &start, &stop, &step, &length) - - return start, stop, step, length - - -cpdef Py_ssize_t slice_len( - slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: - """ - Get length of a bounded slice. - - The slice must not have any "open" bounds that would create dependency on - container size, i.e.: - - if ``s.step is None or s.step > 0``, ``s.stop`` is not ``None`` - - if ``s.step < 0``, ``s.start`` is not ``None`` - - Otherwise, the result is unreliable. - - """ - cdef: - Py_ssize_t start, stop, step, length - - if slc is None: - raise TypeError("slc must be slice") - - slice_get_indices(slc, objlen, - &start, &stop, &step, &length) - - return length - - -def slice_getitem(slice slc not None, ind): - cdef: - Py_ssize_t s_start, s_stop, s_step, s_len - Py_ssize_t ind_start, ind_stop, ind_step, ind_len - - s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - - if isinstance(ind, slice): - ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, - s_len) - - if ind_step > 0 and ind_len == s_len: - # short-cut for no-op slice - if ind_len == s_len: - return slc - - if ind_step < 0: - s_start = s_stop - s_step - ind_step = -ind_step - - s_step *= ind_step - s_stop = s_start + ind_stop * s_step - s_start = s_start + ind_start * s_step - - if s_step < 0 and s_stop < 0: - return slice(s_start, None, s_step) - else: - return slice(s_start, s_stop, s_step) - - else: - return np.arange(s_start, s_stop, s_step, dtype=np.int64)[ind] - - -cdef class BlockPlacement: - # __slots__ = '_as_slice', '_as_array', '_len' - cdef slice _as_slice - cdef object _as_array - - cdef bint _has_slice, _has_array, _is_known_slice_like - - def __init__(self, val): - cdef slice slc - - self._has_slice = False - self._has_array = False - - if isinstance(val, slice): - slc = slice_canonize(val) - - if slc.start != slc.stop: - self._as_slice = slc - self._has_slice = True - else: - arr = np.empty(0, dtype=np.int64) - self._as_array = arr - self._has_array = True - else: - # Cython memoryview interface requires ndarray to be writeable. - arr = np.require(val, dtype=np.int64, requirements='W') - assert arr.ndim == 1 - self._as_array = arr - self._has_array = True - - def __str__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - v = self._as_slice - else: - v = self._as_array - - return '%s(%r)' % (self.__class__.__name__, v) - - __repr__ = __str__ - - def __len__(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return slice_len(s) - else: - return len(self._as_array) - - def __iter__(self): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t start, stop, step, _ - if s is not None: - start, stop, step, _ = slice_get_indices_ex(s) - return iter(range(start, stop, step)) - else: - return iter(self._as_array) - - @property - def as_slice(self): - cdef slice s = self._ensure_has_slice() - if s is None: - raise TypeError('Not slice-like') - else: - return s - - @property - def indexer(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return s - else: - return self._as_array - - def isin(self, arr): - from pandas.core.index import Int64Index - return Int64Index(self.as_array, copy=False).isin(arr) - - @property - def as_array(self): - cdef Py_ssize_t start, stop, end, _ - if not self._has_array: - start, stop, step, _ = slice_get_indices_ex(self._as_slice) - self._as_array = np.arange(start, stop, step, - dtype=np.int64) - self._has_array = True - return self._as_array - - @property - def is_slice_like(self): - cdef slice s = self._ensure_has_slice() - return s is not None - - def __getitem__(self, loc): - cdef slice s = self._ensure_has_slice() - if s is not None: - val = slice_getitem(s, loc) - else: - val = self._as_array[loc] - - if not isinstance(val, slice) and val.ndim == 0: - return val - - return BlockPlacement(val) - - def delete(self, loc): - return BlockPlacement(np.delete(self.as_array, loc, axis=0)) - - def append(self, others): - if len(others) == 0: - return self - - return BlockPlacement(np.concatenate([self.as_array] + - [o.as_array for o in others])) - - cdef iadd(self, other): - cdef slice s = self._ensure_has_slice() - cdef Py_ssize_t other_int, start, stop, step, l - - if isinstance(other, int) and s is not None: - other_int = other - - if other_int == 0: - return self - - start, stop, step, l = slice_get_indices_ex(s) - start += other_int - stop += other_int - - if ((step > 0 and start < 0) or - (step < 0 and stop < step)): - raise ValueError("iadd causes length change") - - if stop < 0: - self._as_slice = slice(start, None, step) - else: - self._as_slice = slice(start, stop, step) - - self._has_array = False - self._as_array = None - else: - newarr = self.as_array + other - if (newarr < 0).any(): - raise ValueError("iadd causes length change") - - self._as_array = newarr - self._has_array = True - self._has_slice = False - self._as_slice = None - - return self - - cdef BlockPlacement copy(self): - cdef slice s = self._ensure_has_slice() - if s is not None: - return BlockPlacement(s) - else: - return BlockPlacement(self._as_array) - - def add(self, other): - return self.copy().iadd(other) - - def sub(self, other): - return self.add(-other) - - cdef slice _ensure_has_slice(self): - if not self._has_slice: - self._as_slice = indexer_as_slice(self._as_array) - self._has_slice = True - return self._as_slice - - include "reduce.pyx" include "inference.pyx" diff --git a/pandas/core/internals.py b/pandas/core/internals.py index bc75a110354c0..698623bd8cd7f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -11,6 +11,8 @@ import numpy as np +from pandas._libs import internals as libinternals + from pandas.core.base import PandasObject from pandas.core.dtypes.dtypes import ( @@ -67,7 +69,7 @@ from pandas.core.sparse.array import _maybe_to_sparse, SparseArray from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta -from pandas._libs.lib import BlockPlacement +from pandas._libs.internals import BlockPlacement from pandas._libs.tslibs import conversion from pandas.util._decorators import cache_readonly @@ -1228,7 +1230,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None): if new_mgr_locs is None: if axis == 0: - slc = lib.indexer_as_slice(indexer) + slc = libinternals.indexer_as_slice(indexer) if slc is not None: new_mgr_locs = self.mgr_locs[slc] else: @@ -5023,7 +5025,7 @@ def _get_blkno_placements(blknos, blk_count, group=True): blknos = _ensure_int64(blknos) # FIXME: blk_count is unused, but it may avoid the use of dicts in cython - for blkno, indexer in lib.get_blkno_indexers(blknos, group): + for blkno, indexer in libinternals.get_blkno_indexers(blknos, group): yield blkno, BlockPlacement(indexer) @@ -5665,8 +5667,8 @@ def _fast_count_smallints(arr): def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): if isinstance(slice_or_indexer, slice): - return 'slice', slice_or_indexer, lib.slice_len(slice_or_indexer, - length) + return ('slice', slice_or_indexer, + libinternals.slice_len(slice_or_indexer, length)) elif (isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_): return 'mask', slice_or_indexer, slice_or_indexer.sum() diff --git a/setup.py b/setup.py index 7dbf6c84a0451..16ca0c132eaa9 100755 --- a/setup.py +++ b/setup.py @@ -302,6 +302,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/hashtable.pyx', 'pandas/_libs/tslib.pyx', 'pandas/_libs/index.pyx', + 'pandas/_libs/internals.pyx', 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', 'pandas/_libs/indexing.pyx', @@ -478,6 +479,8 @@ def pxd(name): 'sources': np_datetime_sources}, '_libs.indexing': { 'pyxfile': '_libs/indexing'}, + '_libs.internals': { + 'pyxfile': '_libs/internals'}, '_libs.interval': { 'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], From 0a94ccac854552cd1bfd31129f3532257569f934 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 17 Jan 2018 23:33:29 -0800 Subject: [PATCH 2/4] Cleanup leftover cimport, use c-api funcs for isinstance --- pandas/_libs/internals.pyx | 11 ++++++----- pandas/_libs/lib.pyx | 7 ------- 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 7f0e91707b69f..4ebc4a0a266ea 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,7 +3,8 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport PyObject +from cpython cimport PyObject, PyInt_Check +from cpython.slice cimport PySlice_Check import numpy as np cimport numpy as np @@ -33,7 +34,7 @@ cdef class BlockPlacement: self._has_slice = False self._has_array = False - if isinstance(val, slice): + if PySlice_Check(val): slc = slice_canonize(val) if slc.start != slc.stop: @@ -127,7 +128,7 @@ cdef class BlockPlacement: else: val = self._as_array[loc] - if not isinstance(val, slice) and val.ndim == 0: + if not PySlice_Check(val) and val.ndim == 0: return val return BlockPlacement(val) @@ -147,7 +148,7 @@ cdef class BlockPlacement: slice s = self._ensure_has_slice() Py_ssize_t other_int, start, stop, step, l - if isinstance(other, int) and s is not None: + if PyInt_Check(other) and s is not None: other_int = other if other_int == 0: @@ -299,7 +300,7 @@ cdef slice_getitem(slice slc, ind): s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - if isinstance(ind, slice): + if PySlice_Check(ind): ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4c967f45bb967..5a4feca4f236a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -35,13 +35,6 @@ try: except ImportError: from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE -cdef extern from "compat_helper.h": - - cdef int slice_get_indices( - PyObject* s, Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step, - Py_ssize_t *slicelength) except -1 - cimport cpython isnan = np.isnan From 70514ce1ba0a7a92133e834b1f2ee414892a7a33 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 18 Jan 2018 08:37:12 -0800 Subject: [PATCH 3/4] revert edits to be pure cut/paste --- pandas/_libs/internals.pyx | 72 ++++++++++-------------- pandas/tests/internals/test_internals.py | 12 ++-- 2 files changed, 35 insertions(+), 49 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4ebc4a0a266ea..d5e6a1a1986bb 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,16 +3,13 @@ cimport cython from cython cimport Py_ssize_t -from cpython cimport PyObject, PyInt_Check -from cpython.slice cimport PySlice_Check +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX import numpy as np cimport numpy as np from numpy cimport int64_t -cdef extern from "Python.h": - Py_ssize_t PY_SSIZE_T_MAX - cdef extern from "compat_helper.h": cdef int slice_get_indices(PyObject* s, Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop, @@ -22,19 +19,18 @@ cdef extern from "compat_helper.h": cdef class BlockPlacement: # __slots__ = '_as_slice', '_as_array', '_len' - cdef: - slice _as_slice - object _as_array - bint _has_slice, _has_array, _is_known_slice_like + cdef slice _as_slice + cdef object _as_array + + cdef bint _has_slice, _has_array, _is_known_slice_like def __init__(self, val): - cdef: - slice slc + cdef slice slc self._has_slice = False self._has_array = False - if PySlice_Check(val): + if isinstance(val, slice): slc = slice_canonize(val) if slc.start != slc.stop: @@ -52,8 +48,7 @@ cdef class BlockPlacement: self._has_array = True def __str__(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is not None: v = self._as_slice else: @@ -64,17 +59,15 @@ cdef class BlockPlacement: __repr__ = __str__ def __len__(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is not None: return slice_len(s) else: return len(self._as_array) def __iter__(self): - cdef: - slice s = self._ensure_has_slice() - Py_ssize_t start, stop, step, _ + cdef slice s = self._ensure_has_slice() + cdef Py_ssize_t start, stop, step, _ if s is not None: start, stop, step, _ = slice_get_indices_ex(s) return iter(range(start, stop, step)) @@ -83,8 +76,7 @@ cdef class BlockPlacement: @property def as_slice(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is None: raise TypeError('Not slice-like') else: @@ -92,8 +84,7 @@ cdef class BlockPlacement: @property def indexer(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is not None: return s else: @@ -105,8 +96,7 @@ cdef class BlockPlacement: @property def as_array(self): - cdef: - Py_ssize_t start, stop, end, _ + cdef Py_ssize_t start, stop, end, _ if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) self._as_array = np.arange(start, stop, step, @@ -116,19 +106,17 @@ cdef class BlockPlacement: @property def is_slice_like(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() return s is not None def __getitem__(self, loc): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is not None: val = slice_getitem(s, loc) else: val = self._as_array[loc] - if not PySlice_Check(val) and val.ndim == 0: + if not isinstance(val, slice) and val.ndim == 0: return val return BlockPlacement(val) @@ -144,11 +132,10 @@ cdef class BlockPlacement: [o.as_array for o in others])) cdef iadd(self, other): - cdef: - slice s = self._ensure_has_slice() - Py_ssize_t other_int, start, stop, step, l + cdef slice s = self._ensure_has_slice() + cdef Py_ssize_t other_int, start, stop, step, l - if PyInt_Check(other) and s is not None: + if isinstance(other, int) and s is not None: other_int = other if other_int == 0: @@ -182,8 +169,7 @@ cdef class BlockPlacement: return self cdef BlockPlacement copy(self): - cdef: - slice s = self._ensure_has_slice() + cdef slice s = self._ensure_has_slice() if s is not None: return BlockPlacement(s) else: @@ -202,7 +188,7 @@ cdef class BlockPlacement: return self._as_slice -cdef slice_canonize(slice s): +cpdef slice_canonize(slice s): """ Convert slice to canonical bounded form. """ @@ -248,8 +234,8 @@ cdef slice_canonize(slice s): return slice(start, stop, step) -cpdef Py_ssize_t slice_len(slice slc, - Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: +cpdef Py_ssize_t slice_len( + slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: """ Get length of a bounded slice. @@ -293,14 +279,14 @@ cpdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): return start, stop, step, length -cdef slice_getitem(slice slc, ind): +def slice_getitem(slice slc not None, ind): cdef: Py_ssize_t s_start, s_stop, s_step, s_len Py_ssize_t ind_start, ind_stop, ind_step, ind_len s_start, s_stop, s_step, s_len = slice_get_indices_ex(slc) - if PySlice_Check(ind): + if isinstance(ind, slice): ind_start, ind_stop, ind_step, ind_len = slice_get_indices_ex(ind, s_len) @@ -402,7 +388,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): start = 0 cur_blkno = blknos[start] - if group is False: + if group == False: for i in range(1, n): if blknos[i] != cur_blkno: yield cur_blkno, slice(start, i) @@ -447,4 +433,4 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): res_view[i] = diff i += 1 - yield blkno, result + yield blkno, result \ No newline at end of file diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 623d2d39607c2..dcbd19954ed60 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -39,8 +39,8 @@ def mgr(): def assert_block_equal(left, right): tm.assert_numpy_array_equal(left.values, right.values) assert left.dtype == right.dtype - assert isinstance(left.mgr_locs, lib.BlockPlacement) - assert isinstance(right.mgr_locs, lib.BlockPlacement) + assert isinstance(left.mgr_locs, BlockPlacement) + assert isinstance(right.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(left.mgr_locs.as_array, right.mgr_locs.as_array) @@ -222,7 +222,7 @@ def _check(blk): _check(self.bool_block) def test_mgr_locs(self): - assert isinstance(self.fblock.mgr_locs, lib.BlockPlacement) + assert isinstance(self.fblock.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(self.fblock.mgr_locs.as_array, np.array([0, 2, 4], dtype=np.int64)) @@ -264,14 +264,14 @@ def test_insert(self): def test_delete(self): newb = self.fblock.copy() newb.delete(0) - assert isinstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([2, 4], dtype=np.int64)) assert (newb.values[0] == 1).all() newb = self.fblock.copy() newb.delete(1) - assert isinstance(newb.mgr_locs, lib.BlockPlacement) + assert isinstance(newb.mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(newb.mgr_locs.as_array, np.array([0, 4], dtype=np.int64)) assert (newb.values[1] == 2).all() @@ -679,7 +679,7 @@ def test_consolidate_ordering_issues(self, mgr): assert cons.nblocks == 4 cons = mgr.consolidate().get_numeric_data() assert cons.nblocks == 1 - assert isinstance(cons.blocks[0].mgr_locs, lib.BlockPlacement) + assert isinstance(cons.blocks[0].mgr_locs, BlockPlacement) tm.assert_numpy_array_equal(cons.blocks[0].mgr_locs.as_array, np.arange(len(cons.items), dtype=np.int64)) From f0146b20c7cd8f85a003f64c4b5ba35d028ae308 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 18 Jan 2018 13:56:48 -0800 Subject: [PATCH 4/4] fix missing cimport --- pandas/_libs/internals.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index d5e6a1a1986bb..93a45335efc9c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,6 +3,8 @@ cimport cython from cython cimport Py_ssize_t +from cpython cimport PyObject + cdef extern from "Python.h": Py_ssize_t PY_SSIZE_T_MAX