-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
CLN/STY: pandas/_libs/internals.pyx #32801
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
ac115a2
67a592f
8ea73ff
c824540
571560e
dbd016e
efd2e18
6223f3c
ef276a0
b06587e
9359c5e
d80b3df
6cd03f3
3e285c8
cc3bd20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,7 +20,6 @@ cdef class BlockPlacement: | |
cdef: | ||
slice _as_slice | ||
object _as_array | ||
|
||
bint _has_slice, _has_array, _is_known_slice_like | ||
|
||
def __init__(self, val): | ||
|
@@ -52,19 +51,21 @@ cdef class BlockPlacement: | |
def __str__(self) -> str: | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
|
||
if s is not None: | ||
v = self._as_slice | ||
else: | ||
v = self._as_array | ||
|
||
return f'{type(self).__name__}({v})' | ||
return f"{type(self).__name__}({v})" | ||
|
||
def __repr__(self) -> str: | ||
return str(self) | ||
|
||
def __len__(self) -> int: | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
|
||
if s is not None: | ||
return slice_len(s) | ||
else: | ||
|
@@ -74,59 +75,65 @@ cdef class BlockPlacement: | |
cdef: | ||
slice s = self._ensure_has_slice() | ||
Py_ssize_t start, stop, step, _ | ||
|
||
if s is not None: | ||
start, stop, step, _ = slice_get_indices_ex(s) | ||
return iter(range(start, stop, step)) | ||
else: | ||
return iter(self._as_array) | ||
|
||
return iter(self._as_array) | ||
|
||
@property | ||
def as_slice(self) -> slice: | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
if s is None: | ||
raise TypeError('Not slice-like') | ||
else: | ||
|
||
if s is not None: | ||
return s | ||
|
||
raise TypeError("Not slice-like") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also this change I personally don't find an improvement in code flow |
||
|
||
@property | ||
def indexer(self): | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
|
||
if s is not None: | ||
return s | ||
else: | ||
return self._as_array | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche This is a very similar change to the change that was mentioned here, would you like me to revert that as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be clear, my opinion is only a single one, but yes, I would personally revert this as well |
||
|
||
return self._as_array | ||
|
||
def isin(self, arr): | ||
from pandas.core.indexes.api import Int64Index | ||
|
||
return Int64Index(self.as_array, copy=False).isin(arr) | ||
|
||
@property | ||
def as_array(self): | ||
cdef: | ||
Py_ssize_t start, stop, end, _ | ||
|
||
if not self._has_array: | ||
start, stop, step, _ = slice_get_indices_ex(self._as_slice) | ||
# NOTE: this is the C-optimized equivalent of | ||
# np.arange(start, stop, step, dtype=np.int64) | ||
# NOTE: | ||
# this is the C-optimized equivalent of | ||
# `np.arange(start, stop, step, dtype=np.int64)` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not a deal-breaker but i dont see how this is an improvement. especially if the comment is going to be repeated in multiple places id rather it be 2 lines than 3 |
||
self._as_array = cnp.PyArray_Arange(start, stop, step, NPY_INT64) | ||
self._has_array = True | ||
|
||
return self._as_array | ||
|
||
@property | ||
def is_slice_like(self) -> bool: | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
|
||
return s is not None | ||
|
||
def __getitem__(self, loc): | ||
cdef: | ||
slice s = self._ensure_has_slice() | ||
if s is not None: | ||
val = slice_getitem(s, loc) | ||
else: | ||
val = self._as_array[loc] | ||
|
||
val = slice_getitem(s, loc) if s is not None else self._as_array[loc] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jorisvandenbossche This is a very similar change to the change that was mentioned here, would you like me to revert that as well? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is different from the other places where you left this comment, and this is the only one i would ask you to revert. The way it is currently written, I can look in coverage output to see if both branches are reached; I can't with this PR's version. |
||
|
||
if not isinstance(val, slice) and val.ndim == 0: | ||
return val | ||
|
@@ -137,11 +144,12 @@ cdef class BlockPlacement: | |
return BlockPlacement(np.delete(self.as_array, loc, axis=0)) | ||
|
||
def append(self, others): | ||
if len(others) == 0: | ||
if not len(others): | ||
return self | ||
|
||
return BlockPlacement(np.concatenate([self.as_array] + | ||
[o.as_array for o in others])) | ||
return BlockPlacement( | ||
np.concatenate([self.as_array] + [o.as_array for o in others]) | ||
) | ||
|
||
cdef iadd(self, other): | ||
cdef: | ||
|
@@ -159,8 +167,7 @@ cdef class BlockPlacement: | |
start += other_int | ||
stop += other_int | ||
|
||
if ((step > 0 and start < 0) or | ||
(step < 0 and stop < step)): | ||
if (step > 0 and start < 0) or (step < 0 and stop < step): | ||
raise ValueError("iadd causes length change") | ||
|
||
if stop < 0: | ||
|
@@ -187,6 +194,7 @@ cdef class BlockPlacement: | |
if not self._has_slice: | ||
self._as_slice = indexer_as_slice(self._as_array) | ||
self._has_slice = True | ||
|
||
return self._as_slice | ||
|
||
|
||
|
@@ -236,8 +244,7 @@ cdef slice slice_canonize(slice s): | |
return slice(start, stop, step) | ||
|
||
|
||
cpdef Py_ssize_t slice_len( | ||
slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: | ||
cpdef Py_ssize_t slice_len(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX) except -1: | ||
""" | ||
Get length of a bounded slice. | ||
|
||
|
@@ -254,8 +261,7 @@ cpdef Py_ssize_t slice_len( | |
if slc is None: | ||
raise TypeError("slc must be slice") | ||
|
||
PySlice_GetIndicesEx(slc, objlen, | ||
&start, &stop, &step, &length) | ||
PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) | ||
|
||
return length | ||
|
||
|
@@ -273,8 +279,7 @@ cdef slice_get_indices_ex(slice slc, Py_ssize_t objlen=PY_SSIZE_T_MAX): | |
if slc is None: | ||
raise TypeError("slc should be a slice") | ||
|
||
PySlice_GetIndicesEx(slc, objlen, | ||
&start, &stop, &step, &length) | ||
PySlice_GetIndicesEx(slc, objlen, &start, &stop, &step, &length) | ||
|
||
return start, stop, step, length | ||
|
||
|
@@ -370,22 +375,17 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): | |
# There's blkno in this function's name because it's used in block & | ||
# blockno handling. | ||
cdef: | ||
int64_t cur_blkno | ||
Py_ssize_t i, start, stop, n, diff | ||
|
||
Py_ssize_t i, diff, tot_len, stop | ||
Py_ssize_t start = 0, n = blknos.shape[0] | ||
object blkno | ||
object group_dict = defaultdict(list) | ||
int64_t[:] res_view | ||
|
||
n = blknos.shape[0] | ||
int64_t cur_blkno = blknos[start] | ||
|
||
if n == 0: | ||
return | ||
|
||
start = 0 | ||
cur_blkno = blknos[start] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doing this here instead of above means we can avoid the assignment in the n==0 case |
||
|
||
if group is False: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From my benchmarking it's actually the other way around:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. look at the generated C code. The "is" should be a pointer comparison, whereas the other one should have to do some more work. |
||
if not group: | ||
for i in range(1, n): | ||
if blknos[i] != cur_blkno: | ||
yield cur_blkno, slice(start, i) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@jorisvandenbossche This is a very similar change to the change that was mentioned here, would you like me to revert that as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
same here (see comment below)