Skip to content

Commit 6d7686a

Browse files
committed
CLN: drop internals._invert_reordering in favour of lib.get_reverse_indexer
1 parent 9b394e2 commit 6d7686a

File tree

4 files changed

+20
-62
lines changed

4 files changed

+20
-62
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ Improvements to existing features
295295
- ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`)
296296
- ``pivot_table`` can now accept ``Grouper`` by ``index`` and ``columns`` keywords (:issue:`6913`)
297297
- Improved performance of compatible pickles (:issue:`6899`)
298+
- Refactor Block classes removing `Block.items` attributes to avoid duplication
299+
in item handling (:issue:`6745`, :issue:`6988`).
298300

299301
.. _release.bug_fixes-0.14.0:
300302

doc/source/v0.14.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,12 @@ users upgrade to this version.
2727

2828
- :ref:`Bug Fixes <release.bug_fixes-0.14.0>`
2929

30+
.. warning::
31+
32+
In 0.14.0 all ``NDFrame`` based containers have underwent significant internal refactoring. Before that each block of
33+
homogeneous data had its own labels and extra care was necessary to keep those in sync with parent container's labels.
34+
As stated, the refactoring is internal and no publicly visible changes should happen.
35+
3036
.. _whatsnew_0140.api:
3137

3238
API changes

pandas/core/internals.py

+1-62
Original file line numberDiff line numberDiff line change
@@ -2316,7 +2316,7 @@ def combine(self, blocks, copy=True):
23162316

23172317
# FIXME: optimization potential
23182318
indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks]))
2319-
inv_indexer = _invert_reordering(indexer)
2319+
inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0])
23202320
new_items = self.items.take(indexer)
23212321

23222322
new_blocks = []
@@ -3506,71 +3506,10 @@ def _possibly_compare(a, b, op):
35063506
return res
35073507

35083508

3509-
3510-
35113509
def _concat_indexes(indexes):
35123510
return indexes[0].append(indexes[1:])
35133511

35143512

3515-
def _invert_reordering(reordering, minlength=None):
3516-
"""
3517-
Invert reordering operation.
3518-
3519-
Given array `reordering`, make `reordering_inv` of it, such that::
3520-
3521-
reordering_inv[reordering[x]] = x
3522-
3523-
There are two types of indexers:
3524-
3525-
source
3526-
is when element *s* at position *i* means that values to fill *i-th*
3527-
item of reindex operation should be taken from *s-th* item of the
3528-
original (this is what is returned by `pandas.Index.reindex`).
3529-
destination
3530-
is when element *d* at position *i* means that values from *i-th* item
3531-
of source should be used to fill *d-th* item of reindexing operation.
3532-
3533-
This function will convert from *source* to *destination* and vice-versa.
3534-
3535-
.. note:: trailing ``-1`` may be lost upon conversion (this is what
3536-
`minlength` is there for).
3537-
3538-
.. note:: if *source* indexer is not unique, corresponding *destination*
3539-
indexer will have ``dtype=object`` and will contain lists.
3540-
3541-
Examples:
3542-
3543-
>>> _invert_reordering([3, -1, 2, 4, -1])
3544-
array([-1, -1, 2, 0, 3])
3545-
>>> _invert_reordering([-1, -1, 0, 2, 3])
3546-
array([3, -1, 2, 4])
3547-
>>> _invert_reordering([1,3,5])
3548-
array([-1, 0, -1, 1, -1, 2])
3549-
3550-
"""
3551-
reordering = np.asanyarray(reordering, dtype=np.int64)
3552-
if not com.is_integer_dtype(reordering):
3553-
raise ValueError("Only integer indexers are supported")
3554-
3555-
nonneg_indices = reordering[reordering >= 0].astype(np.int_)
3556-
counts = np.bincount(nonneg_indices, minlength=minlength)
3557-
has_non_unique = (counts > 1).any()
3558-
3559-
dtype = np.dtype(np.object_) if has_non_unique else np.dtype(np.int64)
3560-
inverted = np.empty_like(counts, dtype=dtype)
3561-
inverted.fill(-1)
3562-
3563-
nonneg_positions = np.arange(len(reordering), dtype=np.int64)[reordering >= 0]
3564-
np.put(inverted, nonneg_indices, nonneg_positions)
3565-
3566-
if has_non_unique:
3567-
nonunique_elements = np.arange(len(counts))[counts > 1]
3568-
for elt in nonunique_elements:
3569-
inverted[elt] = nonneg_positions[nonneg_indices == elt].tolist()
3570-
3571-
return inverted
3572-
3573-
35743513
def _get_blkno_placements(blknos, blk_count, group=True):
35753514
"""
35763515

pandas/lib.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,17 @@ def fast_zip(list ndarrays):
495495
return result
496496

497497
def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length):
498+
"""
499+
Reverse indexing operation.
500+
501+
Given `indexer`, make `indexer_inv` of it, such that::
502+
503+
indexer_inv[indexer[x]] = x
504+
505+
.. note:: If indexer is not unique, only first occurrence is accounted.
506+
507+
"""
508+
498509
cdef:
499510
Py_ssize_t i, n = len(indexer)
500511
ndarray[int64_t] rev_indexer

0 commit comments

Comments
 (0)