From 6d7686a6a2e57da31d08beebb2594119aeeb36cf Mon Sep 17 00:00:00 2001 From: immerrr Date: Mon, 28 Apr 2014 13:11:28 +0400 Subject: [PATCH] CLN: drop internals._invert_reordering in favour of lib.get_reverse_indexer --- doc/source/release.rst | 2 ++ doc/source/v0.14.0.txt | 6 ++++ pandas/core/internals.py | 63 +--------------------------------------- pandas/lib.pyx | 11 +++++++ 4 files changed, 20 insertions(+), 62 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 38e95eaba0b0f..91e523cfc882c 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -295,6 +295,8 @@ Improvements to existing features - ``read_excel`` can now read milliseconds in Excel dates and times with xlrd >= 0.9.3. (:issue:`5945`) - ``pivot_table`` can now accept ``Grouper`` by ``index`` and ``columns`` keywords (:issue:`6913`) - Improved performance of compatible pickles (:issue:`6899`) +- Refactor Block classes removing `Block.items` attributes to avoid duplication + in item handling (:issue:`6745`, :issue:`6988`). .. _release.bug_fixes-0.14.0: diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 43096b133f26e..2c29804cbafa0 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -27,6 +27,12 @@ users upgrade to this version. - :ref:`Bug Fixes ` +.. warning:: + + In 0.14.0 all ``NDFrame`` based containers have underwent significant internal refactoring. Before that each block of + homogeneous data had its own labels and extra care was necessary to keep those in sync with parent container's labels. + As stated, the refactoring is internal and no publicly visible changes should happen. + .. _whatsnew_0140.api: API changes diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 887f7562421d7..48d047baaa6c0 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -2316,7 +2316,7 @@ def combine(self, blocks, copy=True): # FIXME: optimization potential indexer = np.sort(np.concatenate([b.mgr_locs.as_array for b in blocks])) - inv_indexer = _invert_reordering(indexer) + inv_indexer = lib.get_reverse_indexer(indexer, self.shape[0]) new_items = self.items.take(indexer) new_blocks = [] @@ -3506,71 +3506,10 @@ def _possibly_compare(a, b, op): return res - - def _concat_indexes(indexes): return indexes[0].append(indexes[1:]) -def _invert_reordering(reordering, minlength=None): - """ - Invert reordering operation. - - Given array `reordering`, make `reordering_inv` of it, such that:: - - reordering_inv[reordering[x]] = x - - There are two types of indexers: - - source - is when element *s* at position *i* means that values to fill *i-th* - item of reindex operation should be taken from *s-th* item of the - original (this is what is returned by `pandas.Index.reindex`). - destination - is when element *d* at position *i* means that values from *i-th* item - of source should be used to fill *d-th* item of reindexing operation. - - This function will convert from *source* to *destination* and vice-versa. - - .. note:: trailing ``-1`` may be lost upon conversion (this is what - `minlength` is there for). - - .. note:: if *source* indexer is not unique, corresponding *destination* - indexer will have ``dtype=object`` and will contain lists. - - Examples: - - >>> _invert_reordering([3, -1, 2, 4, -1]) - array([-1, -1, 2, 0, 3]) - >>> _invert_reordering([-1, -1, 0, 2, 3]) - array([3, -1, 2, 4]) - >>> _invert_reordering([1,3,5]) - array([-1, 0, -1, 1, -1, 2]) - - """ - reordering = np.asanyarray(reordering, dtype=np.int64) - if not com.is_integer_dtype(reordering): - raise ValueError("Only integer indexers are supported") - - nonneg_indices = reordering[reordering >= 0].astype(np.int_) - counts = np.bincount(nonneg_indices, minlength=minlength) - has_non_unique = (counts > 1).any() - - dtype = np.dtype(np.object_) if has_non_unique else np.dtype(np.int64) - inverted = np.empty_like(counts, dtype=dtype) - inverted.fill(-1) - - nonneg_positions = np.arange(len(reordering), dtype=np.int64)[reordering >= 0] - np.put(inverted, nonneg_indices, nonneg_positions) - - if has_non_unique: - nonunique_elements = np.arange(len(counts))[counts > 1] - for elt in nonunique_elements: - inverted[elt] = nonneg_positions[nonneg_indices == elt].tolist() - - return inverted - - def _get_blkno_placements(blknos, blk_count, group=True): """ diff --git a/pandas/lib.pyx b/pandas/lib.pyx index c7494c2f4344f..53c4e0a44e8e9 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -495,6 +495,17 @@ def fast_zip(list ndarrays): return result def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): + """ + Reverse indexing operation. + + Given `indexer`, make `indexer_inv` of it, such that:: + + indexer_inv[indexer[x]] = x + + .. note:: If indexer is not unique, only first occurrence is accounted. + + """ + cdef: Py_ssize_t i, n = len(indexer) ndarray[int64_t] rev_indexer