From ac115a2271fb9f74e00d572be5483e6f104571e3 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 13:13:30 +0200 Subject: [PATCH 1/9] CLN/STY: pandas/_libs/internals.pyx --- pandas/_libs/internals.pyx | 83 +++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 45 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 5545302fcbfc4..d5400f3a2f9cf 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -20,7 +20,6 @@ cdef class BlockPlacement: cdef: slice _as_slice object _as_array - bint _has_slice, _has_array, _is_known_slice_like def __init__(self, val): @@ -52,12 +51,10 @@ cdef class BlockPlacement: def __str__(self) -> str: cdef: slice s = self._ensure_has_slice() - if s is not None: - v = self._as_slice - else: - v = self._as_array - return f'{type(self).__name__}({v})' + v = self._as_slice if s is not None else self._as_array + + return f"{type(self).__name__}({v})" def __repr__(self) -> str: return str(self) @@ -65,68 +62,75 @@ cdef class BlockPlacement: def __len__(self) -> int: cdef: slice s = self._ensure_has_slice() + if s is not None: return slice_len(s) - else: - return len(self._as_array) + + return len(self._as_array) def __iter__(self): cdef: slice s = self._ensure_has_slice() Py_ssize_t start, stop, step, _ + if s is not None: start, stop, step, _ = slice_get_indices_ex(s) return iter(range(start, stop, step)) - else: - return iter(self._as_array) + + return iter(self._as_array) @property def as_slice(self) -> slice: cdef: slice s = self._ensure_has_slice() - if s is None: - raise TypeError('Not slice-like') - else: + + if s is not None: return s + raise TypeError("Not slice-like") + @property def indexer(self): cdef: slice s = self._ensure_has_slice() + if s is not None: return s - else: - return self._as_array + + return self._as_array def isin(self, arr): from pandas.core.indexes.api import Int64Index + return Int64Index(self.as_array, copy=False).isin(arr) @property def as_array(self): cdef: Py_ssize_t start, stop, end, _ + if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) - # NOTE: this is the C-optimized equivalent of - # np.arange(start, stop, step, dtype=np.int64) + # NOTE: + # this is the C-optimized equivalent of + # `np.arange(start, stop, step, dtype=np.int64)` self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INT64) self._has_array = True + return self._as_array @property def is_slice_like(self) -> bool: cdef: slice s = self._ensure_has_slice() + return s is not None def __getitem__(self, loc): cdef: slice s = self._ensure_has_slice() - if s is not None: - val = slice_getitem(s, loc) - else: - val = self._as_array[loc] + + val = slice_getitem(s, loc) if s is not None else self._as_array[loc] if not isinstance(val, slice) and val.ndim == 0: return val @@ -137,11 +141,12 @@ cdef class BlockPlacement: return BlockPlacement(np.delete(self.as_array, loc, axis=0)) def append(self, others): - if len(others) == 0: + if not len(others): return self - return BlockPlacement(np.concatenate([self.as_array] + - [o.as_array for o in others])) + return BlockPlacement( + np.concatenate([self.as_array] + [o.as_array for o in others]) + ) cdef iadd(self, other): cdef: @@ -159,14 +164,10 @@ cdef class BlockPlacement: start += other_int stop += other_int - if ((step > 0 and start < 0) or - (step < 0 and stop < step)): + if (step > 0 and start < 0) or (step < 0 and stop < step): raise ValueError("iadd causes length change") - if stop < 0: - val = slice(start, None, step) - else: - val = slice(start, stop, step) + val = slice(start, None, step) if stop < 0 else slice(start, stop, step) return BlockPlacement(val) else: @@ -187,6 +188,7 @@ cdef class BlockPlacement: if not self._has_slice: self._as_slice = indexer_as_slice(self._as_array) self._has_slice = True + return self._as_slice @@ -236,8 +238,7 @@ cdef slice slice_canonize(slice s): return slice(start, stop, step) -cpdef Py_ssize_t slice_len( - slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: +cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: """ Get length of a bounded slice. @@ -254,8 +255,7 @@ cpdef Py_ssize_t slice_len( if slc is None: raise TypeError("slc must be slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) return length @@ -273,8 +273,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): if slc is None: raise TypeError("slc should be a slice") - PySlice_GetIndicesEx(slc, objlen, - &start, &stop, &step, &length) + PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) return start, stop, step, length @@ -370,22 +369,16 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): # There's blkno in this function's name because it's used in block & # blockno handling. cdef: - int64_t cur_blkno - Py_ssize_t i, start, stop, n, diff - + Py_ssize_t i, start = 0, stop, n = blknos.shape[0], diff, tot_len object blkno object group_dict = defaultdict(list) int64_t[:] res_view - - n = blknos.shape[0] + int64_t cur_blkno = blknos[start] if n == 0: return - start = 0 - cur_blkno = blknos[start] - - if group is False: + if not group: for i in range(1, n): if blknos[i] != cur_blkno: yield cur_blkno, slice(start, i) From 67a592f70c11cab4766b2bdd3b5de35d5f8c3b70 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 14:54:22 +0200 Subject: [PATCH 2/9] Splitted cdef to multiple lines REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394308610 --- pandas/_libs/internals.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index d5400f3a2f9cf..61ce177bf54cd 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -369,7 +369,8 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): # There's blkno in this function's name because it's used in block & # blockno handling. cdef: - Py_ssize_t i, start = 0, stop, n = blknos.shape[0], diff, tot_len + Py_ssize_t i, diff, tot_len, stop + Py_ssize_t start = 0, n = blknos.shape[0] object blkno object group_dict = defaultdict(list) int64_t[:] res_view From 8ea73ff62c80daf41b641210b5d0597d7a1b8f1c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 14:56:21 +0200 Subject: [PATCH 3/9] Reverted if/else statement REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394307873 --- pandas/_libs/internals.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 61ce177bf54cd..4095636fe5702 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -52,7 +52,10 @@ cdef class BlockPlacement: cdef: slice s = self._ensure_has_slice() - v = self._as_slice if s is not None else self._as_array + if s is not None: + v = self._as_slice + else: + v = self._as_array return f"{type(self).__name__}({v})" From c824540d16d8c28f16b4877592bd1c91ddb74b68 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 14:58:17 +0200 Subject: [PATCH 4/9] Reverted if/else change REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394308161 --- pandas/_libs/internals.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 4095636fe5702..fc8926035146f 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -68,8 +68,8 @@ cdef class BlockPlacement: if s is not None: return slice_len(s) - - return len(self._as_array) + else: + return len(self._as_array) def __iter__(self): cdef: From 571560ec91b557ae1cc27eaf8c97940dbca44298 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 14:59:56 +0200 Subject: [PATCH 5/9] Reverted if/else changes REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394308945 --- pandas/_libs/internals.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index fc8926035146f..cdc261310fa0f 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -170,7 +170,10 @@ cdef class BlockPlacement: if (step > 0 and start < 0) or (step < 0 and stop < step): raise ValueError("iadd causes length change") - val = slice(start, None, step) if stop < 0 else slice(start, stop, step) + if stop < 0: + val = slice(start, None, step) + else: + val = slice(start, stop, step) return BlockPlacement(val) else: From dbd016e95465aa7de486a9361c4428139f924f42 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Wed, 18 Mar 2020 18:30:32 +0200 Subject: [PATCH 6/9] Revert if/else statement REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394474768 --- pandas/_libs/internals.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index cdc261310fa0f..cdd8de1fa6f09 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -133,7 +133,10 @@ cdef class BlockPlacement: cdef: slice s = self._ensure_has_slice() - val = slice_getitem(s, loc) if s is not None else self._as_array[loc] + if s is not None: + val = slice_getitem(s, loc) + else: + val = self._as_array[loc] if not isinstance(val, slice) and val.ndim == 0: return val From 6223f3c0e2a008fa515aceea034a3e7d3ae5cb0c Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 19 Mar 2020 00:09:38 +0200 Subject: [PATCH 7/9] Make the comment to be 2 lines REF: https://github.com/pandas-dev/pandas/pull/32801#discussion_r394660909 --- pandas/_libs/internals.pyx | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 43143bd015f1b..2d5f6978cd147 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -114,9 +114,8 @@ cdef class BlockPlacement: if not self._has_array: start, stop, step, _ = slice_get_indices_ex(self._as_slice) - # NOTE: - # this is the C-optimized equivalent of - # `np.arange(start, stop, step, dtype=np.int64)` + # NOTE: this is the C-optimized equivalent of + # `np.arange(start, stop, step, dtype=np.int64)` self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INT64) self._has_array = True From ef276a0782685148c1a0928f9cb80e065490b229 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 19 Mar 2020 00:13:43 +0200 Subject: [PATCH 8/9] Revert some assignment REF: https://github.com/pandas-dev/pandas/pull/32801/files#r394661666 --- pandas/_libs/internals.pyx | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 2d5f6978cd147..8a132303d474e 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -380,16 +380,20 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): # There's blkno in this function's name because it's used in block & # blockno handling. cdef: - Py_ssize_t i, diff, tot_len, stop - Py_ssize_t start = 0, n = blknos.shape[0] + int64_t cur_blkno + Py_ssize_t i, start, stop, n, diff, tot_len object blkno object group_dict = defaultdict(list) int64_t[:] res_view - int64_t cur_blkno = blknos[start] + + n = blknos.shape[0] if n == 0: return + start = 0 + cur_blkno = blknos[start] + if not group: for i in range(1, n): if blknos[i] != cur_blkno: From 6cd03f3fb4d8e6655ee3bbe1b657a8baf507765e Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 21 Mar 2020 17:25:48 +0200 Subject: [PATCH 9/9] Revert if/else statement --- pandas/_libs/internals.pyx | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 1ab2e4eeb6bda..14ebbd73ca9bd 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -79,8 +79,8 @@ cdef class BlockPlacement: if s is not None: start, stop, step, _ = slice_get_indices_ex(s) return iter(range(start, stop, step)) - - return iter(self._as_array) + else: + return iter(self._as_array) @property def as_slice(self) -> slice: @@ -89,8 +89,8 @@ cdef class BlockPlacement: if s is not None: return s - - raise TypeError("Not slice-like") + else: + raise TypeError("Not slice-like") @property def indexer(self): @@ -99,8 +99,8 @@ cdef class BlockPlacement: if s is not None: return s - - return self._as_array + else: + return self._as_array def isin(self, arr): from pandas.core.indexes.api import Int64Index