Skip to content

Commit a609574

Browse files
committed
ENH: more refactoring for LongPanel removal, Index._join_level method, DataFrame.reindex_axis, #108
1 parent 61b3078 commit a609574

14 files changed

+288
-156
lines changed

TODO.rst

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,11 @@
1+
LongPanel removal
2+
=================
3+
4+
- level to flex methods
5+
- level to reindex
6+
- fast take for items
7+
8+
19
DONE
210
----
311
- SparseSeries name integration + tests

pandas/core/frame.py

+70-35
Original file line numberDiff line numberDiff line change
@@ -1295,12 +1295,13 @@ def _align_frame(self, other, join='outer', axis=None, level=None,
12951295
if axis is None or axis == 0:
12961296
if not self.index.equals(other.index):
12971297
join_index, ilidx, iridx = \
1298-
self.index.join(other.index, how=join, return_indexers=True)
1298+
self.index.join(other.index, how=join, level=level,
1299+
return_indexers=True)
12991300

13001301
if axis is None or axis == 1:
13011302
if not self.columns.equals(other.columns):
13021303
join_columns, clidx, cridx = \
1303-
self.columns.join(other.columns, how=join,
1304+
self.columns.join(other.columns, how=join, level=level,
13041305
return_indexers=True)
13051306

13061307
def _align(frame, row_idx, col_idx):
@@ -1310,7 +1311,8 @@ def _align(frame, row_idx, col_idx):
13101311

13111312
if col_idx is not None:
13121313
# TODO: speed up on homogeneous DataFrame objects
1313-
new_data = new_data.reindex_items(join_columns)
1314+
new_data = new_data.reindex_indexer(join_columns, col_idx,
1315+
axis=0)
13141316

13151317
if copy and new_data is frame._data:
13161318
new_data = new_data.copy()
@@ -1321,6 +1323,15 @@ def _align(frame, row_idx, col_idx):
13211323
right = _align(other, iridx, cridx)
13221324
return left, right
13231325

1326+
def _align_level(self, multi_index, level, axis=0, copy=True):
1327+
levnum = multi_index._get_level_number(level)
1328+
data = self.reindex_axis(multi_index.levels[levnum], axis=axis,
1329+
copy=False)._data
1330+
mgr_axis = 0 if axis == 1 else 1
1331+
new_data = data.reindex_indexer(multi_index, multi_index.labels[levnum],
1332+
axis=mgr_axis)
1333+
return DataFrame(new_data)
1334+
13241335
def _align_series(self, other, join='outer', axis=None, level=None,
13251336
copy=True):
13261337
fdata = self._data
@@ -1337,11 +1348,12 @@ def _align_series(self, other, join='outer', axis=None, level=None,
13371348
join_index = self.columns
13381349
lidx, ridx = None, None
13391350
if not self.columns.equals(other.index):
1340-
join_index, lidx, ridx = self.columns.join(other.index, how=join,
1341-
return_indexers=True)
1351+
join_index, lidx, ridx = \
1352+
self.columns.join(other.index, how=join,
1353+
return_indexers=True)
13421354

13431355
if lidx is not None:
1344-
fdata = fdata.reindex_items(join_index)
1356+
fdata = fdata.reindex_indexer(join_index, lidx, axis=0)
13451357
else:
13461358
raise ValueError('Must specify axis=0 or 1')
13471359

@@ -1354,7 +1366,7 @@ def _align_series(self, other, join='outer', axis=None, level=None,
13541366

13551367
def reindex(self, index=None, columns=None, method=None, level=None,
13561368
copy=True):
1357-
"""Conform Series to new index with optional filling logic, placing
1369+
"""Conform DataFrame to new index with optional filling logic, placing
13581370
NA/NaN in locations having no value in the previous index. A new object
13591371
is produced unless the new index is equivalent to the current one and
13601372
copy=False
@@ -1385,33 +1397,65 @@ def reindex(self, index=None, columns=None, method=None, level=None,
13851397
frame = self
13861398

13871399
if index is not None:
1388-
index = _ensure_index(index)
13891400
frame = frame._reindex_index(index, method, copy, level)
13901401

13911402
if columns is not None:
1392-
columns = _ensure_index(columns)
13931403
frame = frame._reindex_columns(columns, copy, level)
13941404

13951405
return frame
13961406

1407+
def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True):
1408+
"""Conform DataFrame to new index with optional filling logic, placing
1409+
NA/NaN in locations having no value in the previous index. A new object
1410+
is produced unless the new index is equivalent to the current one and
1411+
copy=False
1412+
1413+
Parameters
1414+
----------
1415+
index : array-like, optional
1416+
New labels / index to conform to. Preferably an Index object to
1417+
avoid duplicating data
1418+
axis : {0, 1}
1419+
0 -> index (rows)
1420+
1 -> columns
1421+
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
1422+
Method to use for filling holes in reindexed DataFrame
1423+
pad / ffill: propagate last valid observation forward to next valid
1424+
backfill / bfill: use NEXT valid observation to fill gap
1425+
copy : boolean, default True
1426+
Return a new object, even if the passed indexes are the same
1427+
1428+
Examples
1429+
--------
1430+
>>> df.reindex(['A', 'B', 'C'], axis=1)
1431+
1432+
See also
1433+
--------
1434+
DataFrame.reindex, DataFrame.reindex_like
1435+
1436+
Returns
1437+
-------
1438+
reindexed : same type as calling instance
1439+
"""
1440+
self._consolidate_inplace()
1441+
if axis == 0:
1442+
return self._reindex_index(labels, method, copy, level)
1443+
elif axis == 1:
1444+
return self._reindex_columns(labels, copy, level)
1445+
else: # pragma: no cover
1446+
raise ValueError('Must specify axis=0 or 1')
1447+
13971448
def _reindex_index(self, new_index, method, copy, level):
1398-
if new_index.equals(self.index):
1399-
if copy:
1400-
result = self.copy()
1401-
result.index = new_index
1402-
return result
1403-
else:
1404-
return self
1405-
new_data = self._data.reindex_axis(new_index, method, axis=1)
1449+
if level is not None:
1450+
return self._align_level(new_index, level, axis=0, copy=copy)
1451+
new_data = self._data.reindex_axis(new_index, method, axis=1,
1452+
copy=copy)
14061453
return self._constructor(new_data)
14071454

1408-
def _reindex_columns(self, new_columns, copy):
1409-
if new_columns.equals(self.columns):
1410-
if copy:
1411-
return self.copy()
1412-
else:
1413-
return self
1414-
new_data = self._data.reindex_axis(new_columns, axis=0)
1455+
def _reindex_columns(self, new_columns, copy, level):
1456+
if level is not None:
1457+
return self._align_level(new_columns, level, axis=1, copy=copy)
1458+
new_data = self._data.reindex_axis(new_columns, axis=0, copy=copy)
14151459
return self._constructor(new_data)
14161460

14171461
def reindex_like(self, other, method=None, copy=True):
@@ -1948,7 +1992,8 @@ def _indexed_same(self, other):
19481992
same_columns = self.columns.equals(other.columns)
19491993
return same_index and same_columns
19501994

1951-
def _combine_series(self, other, func, fill_value=None, axis=None):
1995+
def _combine_series(self, other, func, fill_value=None, axis=None,
1996+
level=None):
19521997
if axis is not None:
19531998
axis = self._get_axis_name(axis)
19541999
if axis == 'index':
@@ -3659,16 +3704,6 @@ def _is_sequence(x):
36593704
except Exception:
36603705
return False
36613706

3662-
def _align_level(frame, multi_index, level, axis=0):
3663-
levnum = multi_index._get_level_number(level)
3664-
3665-
data = frame.reindex(multi_index.levels[levnum], copy=False)._data
3666-
3667-
mgr_axis = 0 if axis == 1 else 1
3668-
new_data = data.reindex_indexer(multi_index, multi_index.labels[levnum],
3669-
axis=mgr_axis)
3670-
return DataFrame(new_data)
3671-
36723707
def install_ipython_completers(): # pragma: no cover
36733708
"""Register the DataFrame type with IPython's tab completion machinery, so
36743709
that it knows about accessing column names as attributes."""

pandas/core/generic.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -370,17 +370,13 @@ def _is_mixed_type(self):
370370
return len(self._data.blocks) > 1
371371

372372
def _reindex_axis(self, new_index, fill_method, axis, copy):
373-
new_index = _ensure_index(new_index)
374-
cur_axis = self._data.axes[axis]
375-
if cur_axis.equals(new_index) and not copy:
376-
return self
373+
new_data = self._data.reindex_axis(new_index, axis=axis,
374+
method=fill_method, copy=copy)
377375

378-
if axis == 0:
379-
new_data = self._data.reindex_items(new_index)
376+
if new_data is self._data and not copy:
377+
return self
380378
else:
381-
new_data = self._data.reindex_axis(new_index, axis=axis,
382-
method=fill_method)
383-
return self._constructor(new_data)
379+
return self._constructor(new_data)
384380

385381
def cumsum(self, axis=None, skipna=True):
386382
"""
@@ -586,7 +582,7 @@ def take(self, indices, axis=0):
586582
if axis == 0:
587583
labels = self._get_axis(axis)
588584
new_items = labels.take(indices)
589-
new_data = self._data.reindex_items(new_items)
585+
new_data = self._data.reindex_axis(new_items, axis=0)
590586
else:
591587
new_data = self._data.take(indices, axis=axis)
592588
return self._constructor(new_data)

pandas/core/index.py

+78-2
Original file line numberDiff line numberDiff line change
@@ -559,11 +559,32 @@ def reindex(self, target, method=None):
559559
indexer = self.get_indexer(target, method=method)
560560
return target, indexer
561561

562-
def join(self, other, how='left', return_indexers=False):
562+
def join(self, other, how='left', level=None, return_indexers=False):
563+
"""
564+
Internal API method. Compute join_index and indexers to conform data
565+
structures to the new index.
566+
567+
Parameters
568+
----------
569+
other : Index
570+
how : {'left', 'right', 'inner', 'outer'}
571+
level :
572+
return_indexers : boolean, default False
573+
574+
Returns
575+
-------
576+
join_index, (left_indexer, right_indexer)
577+
"""
578+
if (level is not None and (isinstance(self, MultiIndex) or
579+
isinstance(other, MultiIndex))):
580+
return self._join_level(other, level, how=how,
581+
return_indexers=return_indexers)
582+
563583
if self.dtype != other.dtype:
564584
this = self.astype('O')
565585
other = other.astype('O')
566-
return this.join(other, how=how, return_indexers=return_indexers)
586+
return this.join(other, how=how,
587+
return_indexers=return_indexers)
567588

568589
if self.is_monotonic and other.is_monotonic:
569590
return self._join_monotonic(other, how=how,
@@ -593,6 +614,61 @@ def join(self, other, how='left', return_indexers=False):
593614
else:
594615
return join_index
595616

617+
def _join_level(self, other, level, how='left', return_indexers=False):
618+
"""
619+
The join method *only* affects the level of the resulting
620+
MultiIndex. Otherwise it just exactly aligns the Index data to the
621+
labels of the level in the MultiIndex. The order of the data indexed by
622+
the MultiIndex will not be changed (currently)
623+
"""
624+
625+
if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
626+
raise Exception('Join on level between two MultiIndex objects '
627+
'is ambiguous')
628+
629+
left, right = self, other
630+
631+
flip_order = not isinstance(self, MultiIndex)
632+
if flip_order:
633+
left, right = right, left
634+
635+
level = left._get_level_number(level)
636+
637+
old_level = left.levels[level]
638+
639+
new_level, left_lev_indexer, right_lev_indexer = \
640+
old_level.join(right, how=how, return_indexers=True)
641+
642+
if left_lev_indexer is not None:
643+
rev_indexer = lib.get_reverse_indexer(left_lev_indexer,
644+
len(old_level))
645+
646+
new_labels = list(left.labels)
647+
new_labels[level] = rev_indexer.take(left.labels[level])
648+
649+
new_levels = list(left.levels)
650+
new_levels[level] = new_level
651+
652+
join_index = MultiIndex(levels=new_levels, labels=new_labels,
653+
names=left.names)
654+
else:
655+
join_index = left
656+
657+
left_indexer = None
658+
659+
if right_lev_indexer is not None:
660+
right_indexer = right_lev_indexer.take(join_index.labels[level])
661+
else:
662+
right_indexer = join_index.labels[level]
663+
664+
if flip_order:
665+
left_indexer, right_indexer = right_indexer, left_indexer
666+
667+
if return_indexers:
668+
return join_index, left_indexer, right_indexer
669+
else:
670+
return join_index
671+
596672
def _join_monotonic(self, other, how='left', return_indexers=False):
597673
if how == 'left':
598674
join_index = self

0 commit comments

Comments
 (0)