Skip to content

Commit 4b6b49f

Browse files
committed
ENH: concat with keys for MultiIndex works. A bit more testing / docs required
1 parent 1317f58 commit 4b6b49f

File tree

3 files changed

+79
-21
lines changed

3 files changed

+79
-21
lines changed

pandas/core/index.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ class Index(np.ndarray):
5454
_backfill = lib.backfill_object
5555

5656
name = None
57+
5758
def __new__(cls, data, dtype=None, copy=False, name=None):
5859
if isinstance(data, np.ndarray):
5960
if dtype is None and issubclass(data.dtype.type, np.integer):

pandas/tools/merge.py

Lines changed: 42 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66

77
from pandas.core.frame import DataFrame, _merge_doc
88
from pandas.core.groupby import get_group_index
9-
from pandas.core.index import Index, MultiIndex, _get_combined_index
9+
from pandas.core.index import (Index, MultiIndex, _get_combined_index,
10+
_ensure_index)
1011
from pandas.core.internals import (IntBlock, BoolBlock, BlockManager,
1112
make_block, _consolidate)
1213
from pandas.util.decorators import cache_readonly, Appender, Substitution
@@ -588,25 +589,31 @@ def _get_all_block_kinds(blockmaps):
588589
# Concatenate DataFrame objects
589590

590591
def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
591-
keys=None, names=None, levels=None, verify_integrity=False):
592+
keys=None, levels=None, names=None, verify_integrity=False):
592593
"""
593-
Concatenate DataFrame objects row or column wise
594+
Concatenate pandas objects along a particular axis with optional set logic
595+
along the other axes. Can also add a layer of hierarchical indexing on the
596+
concatenation axis, which may be useful if the labels are the same (or
597+
overlapping) on the passed axis number
594598
595599
Parameters
596600
----------
597-
objs : list of DataFrame objects
598-
axis : {0, 1}, default 0
601+
objs : list of DataFrame (or other pandas) objects
602+
axis : {0, 1, ...}, default 0
599603
The axis to concatenate along
600604
join : {'inner', 'outer'}, default 'outer'
601605
How to handle indexes on other axis(es)
602606
join_index : index-like
603607
verify_integrity : boolean, default False
604608
Check whether the new concatenated axis contains duplicates. This can
605609
be very expensive relative to the actual data concatenation
610+
keys : sequence-like or list of sequences
611+
levels :
612+
names :
606613
607614
Returns
608615
-------
609-
concatenated : DataFrame
616+
concatenated : type of objects
610617
"""
611618
op = _Concatenator(objs, axis=axis, join_axes=join_axes,
612619
ignore_index=ignore_index, join=join,
@@ -722,8 +729,16 @@ def _concat_blocks(self, blocks):
722729
'DataFrames')
723730
return make_block(concat_values, blocks[0].items, self.new_axes[0])
724731
else:
725-
concat_items = _concat_indexes([b.items for b in blocks])
726-
# TODO: maybe want to "take" from the new columns?
732+
all_items = [b.items for b in blocks]
733+
if self.axis == 0 and self.keys is not None:
734+
offsets = np.r_[0, [len(x._data.axes[self.axis]) for
735+
x in self.objs]]
736+
indexer = np.concatenate([offsets[i] + b.ref_locs
737+
for i, b in enumerate(blocks)])
738+
concat_items = self.new_axes[0].take(indexer)
739+
else:
740+
concat_items = _concat_indexes(all_items)
741+
727742
return make_block(concat_values, concat_items, self.new_axes[0])
728743

729744
def _concat_single_item(self, item):
@@ -837,18 +852,23 @@ def _concat_frames_hierarchical(frames, keys, names, levels, axis=0):
837852
def _concat_indexes(indexes):
838853
return indexes[0].append(indexes[1:])
839854

840-
def _make_concat_multiindex(indexes, keys, levels, names):
841-
single_level = len(levels) == 1
855+
def _make_concat_multiindex(indexes, keys, levels=None, names=None):
856+
single_level = levels is None or len(levels) == 1
857+
858+
if single_level:
859+
zipped = [keys]
860+
if names is None:
861+
names = [None]
862+
else:
863+
zipped = zip(*keys)
864+
if names is None:
865+
names = [None] * len(keys)
842866

843867
if not _all_indexes_same(indexes):
844868
label_list = []
845869

846870
# things are potentially different sizes, so compute the exact labels
847871
# for each level and pass those to MultiIndex.from_arrays
848-
if single_level:
849-
zipped = [keys]
850-
else:
851-
zipped = zip(*keys)
852872

853873
for hlevel in zipped:
854874
to_concat = []
@@ -874,20 +894,21 @@ def _make_concat_multiindex(indexes, keys, levels, names):
874894

875895
names.append(indexes[0].name)
876896

877-
new_levels = list(levels)
897+
if levels is None:
898+
if single_level:
899+
new_levels = [_ensure_index(keys)]
900+
else:
901+
new_levels = [_ensure_index(k) for k in keys]
902+
else:
903+
new_levels = list(levels)
878904

879905
# do something a bit more speedy
880906
new_levels.append(new_index)
881907

882908
# construct labels
883909
labels = []
884910

885-
if single_level:
886-
zipped = [keys]
887-
else:
888-
zipped = zip(*keys)
889-
890-
for hlevel, level in zip(zipped, levels):
911+
for hlevel, level in zip(zipped, new_levels[:-1]):
891912
mapped = level.get_indexer(hlevel)
892913
labels.append(np.repeat(mapped, n))
893914

pandas/tools/tests/test_merge.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,42 @@ def test_append_missing_column_proper_upcast(self):
742742
self.assert_(appended['B'].dtype == 'O')
743743

744744
def test_concat_with_group_keys(self):
745+
df = DataFrame(np.random.randn(4, 3))
746+
df2 = DataFrame(np.random.randn(4, 4))
747+
748+
# axis=0
749+
df = DataFrame(np.random.randn(3, 4))
750+
df2 = DataFrame(np.random.randn(4, 4))
751+
752+
result = concat([df, df2], keys=[0, 1])
753+
exp_index = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1, 1],
754+
[0, 1, 2, 0, 1, 2, 3]])
755+
expected = DataFrame(np.r_[df.values, df2.values],
756+
index=exp_index)
757+
tm.assert_frame_equal(result, expected)
758+
759+
result = concat([df, df], keys=[0, 1])
760+
exp_index2 = MultiIndex.from_arrays([[0, 0, 0, 1, 1, 1],
761+
[0, 1, 2, 0, 1, 2]])
762+
expected = DataFrame(np.r_[df.values, df.values],
763+
index=exp_index2)
764+
tm.assert_frame_equal(result, expected)
765+
766+
# axis=1
767+
df = DataFrame(np.random.randn(4, 3))
768+
df2 = DataFrame(np.random.randn(4, 4))
769+
770+
result = concat([df, df2], keys=[0, 1], axis=1)
771+
expected = DataFrame(np.c_[df.values, df2.values],
772+
columns=exp_index)
773+
tm.assert_frame_equal(result, expected)
774+
775+
result = concat([df, df], keys=[0, 1], axis=1)
776+
expected = DataFrame(np.c_[df.values, df.values],
777+
columns=exp_index2)
778+
tm.assert_frame_equal(result, expected)
779+
780+
def test_concat_keys_and_levels(self):
745781
pass
746782

747783
def test_crossed_dtypes_weird_corner(self):

0 commit comments

Comments
 (0)