6
6
7
7
from pandas .core .frame import DataFrame , _merge_doc
8
8
from pandas .core .groupby import get_group_index
9
- from pandas .core .index import Index , MultiIndex , _get_combined_index
9
+ from pandas .core .index import (Index , MultiIndex , _get_combined_index ,
10
+ _ensure_index )
10
11
from pandas .core .internals import (IntBlock , BoolBlock , BlockManager ,
11
12
make_block , _consolidate )
12
13
from pandas .util .decorators import cache_readonly , Appender , Substitution
@@ -588,25 +589,31 @@ def _get_all_block_kinds(blockmaps):
588
589
# Concatenate DataFrame objects
589
590
590
591
def concat (objs , axis = 0 , join = 'outer' , join_axes = None , ignore_index = False ,
591
- keys = None , names = None , levels = None , verify_integrity = False ):
592
+ keys = None , levels = None , names = None , verify_integrity = False ):
592
593
"""
593
- Concatenate DataFrame objects row or column wise
594
+ Concatenate pandas objects along a particular axis with optional set logic
595
+ along the other axes. Can also add a layer of hierarchical indexing on the
596
+ concatenation axis, which may be useful if the labels are the same (or
597
+ overlapping) on the passed axis number
594
598
595
599
Parameters
596
600
----------
597
- objs : list of DataFrame objects
598
- axis : {0, 1}, default 0
601
+ objs : list of DataFrame (or other pandas) objects
602
+ axis : {0, 1, ... }, default 0
599
603
The axis to concatenate along
600
604
join : {'inner', 'outer'}, default 'outer'
601
605
How to handle indexes on other axis(es)
602
606
join_index : index-like
603
607
verify_integrity : boolean, default False
604
608
Check whether the new concatenated axis contains duplicates. This can
605
609
be very expensive relative to the actual data concatenation
610
+ keys : sequence-like or list of sequences
611
+ levels :
612
+ names :
606
613
607
614
Returns
608
615
-------
609
- concatenated : DataFrame
616
+ concatenated : type of objects
610
617
"""
611
618
op = _Concatenator (objs , axis = axis , join_axes = join_axes ,
612
619
ignore_index = ignore_index , join = join ,
@@ -722,8 +729,16 @@ def _concat_blocks(self, blocks):
722
729
'DataFrames' )
723
730
return make_block (concat_values , blocks [0 ].items , self .new_axes [0 ])
724
731
else :
725
- concat_items = _concat_indexes ([b .items for b in blocks ])
726
- # TODO: maybe want to "take" from the new columns?
732
+ all_items = [b .items for b in blocks ]
733
+ if self .axis == 0 and self .keys is not None :
734
+ offsets = np .r_ [0 , [len (x ._data .axes [self .axis ]) for
735
+ x in self .objs ]]
736
+ indexer = np .concatenate ([offsets [i ] + b .ref_locs
737
+ for i , b in enumerate (blocks )])
738
+ concat_items = self .new_axes [0 ].take (indexer )
739
+ else :
740
+ concat_items = _concat_indexes (all_items )
741
+
727
742
return make_block (concat_values , concat_items , self .new_axes [0 ])
728
743
729
744
def _concat_single_item (self , item ):
@@ -837,18 +852,23 @@ def _concat_frames_hierarchical(frames, keys, names, levels, axis=0):
837
852
def _concat_indexes (indexes ):
838
853
return indexes [0 ].append (indexes [1 :])
839
854
840
- def _make_concat_multiindex (indexes , keys , levels , names ):
841
- single_level = len (levels ) == 1
855
+ def _make_concat_multiindex (indexes , keys , levels = None , names = None ):
856
+ single_level = levels is None or len (levels ) == 1
857
+
858
+ if single_level :
859
+ zipped = [keys ]
860
+ if names is None :
861
+ names = [None ]
862
+ else :
863
+ zipped = zip (* keys )
864
+ if names is None :
865
+ names = [None ] * len (keys )
842
866
843
867
if not _all_indexes_same (indexes ):
844
868
label_list = []
845
869
846
870
# things are potentially different sizes, so compute the exact labels
847
871
# for each level and pass those to MultiIndex.from_arrays
848
- if single_level :
849
- zipped = [keys ]
850
- else :
851
- zipped = zip (* keys )
852
872
853
873
for hlevel in zipped :
854
874
to_concat = []
@@ -874,20 +894,21 @@ def _make_concat_multiindex(indexes, keys, levels, names):
874
894
875
895
names .append (indexes [0 ].name )
876
896
877
- new_levels = list (levels )
897
+ if levels is None :
898
+ if single_level :
899
+ new_levels = [_ensure_index (keys )]
900
+ else :
901
+ new_levels = [_ensure_index (k ) for k in keys ]
902
+ else :
903
+ new_levels = list (levels )
878
904
879
905
# do something a bit more speedy
880
906
new_levels .append (new_index )
881
907
882
908
# construct labels
883
909
labels = []
884
910
885
- if single_level :
886
- zipped = [keys ]
887
- else :
888
- zipped = zip (* keys )
889
-
890
- for hlevel , level in zip (zipped , levels ):
911
+ for hlevel , level in zip (zipped , new_levels [:- 1 ]):
891
912
mapped = level .get_indexer (hlevel )
892
913
labels .append (np .repeat (mapped , n ))
893
914
0 commit comments