@@ -181,8 +181,8 @@ def __init__(self, left, right, how='inner', on=None,
181
181
elif isinstance (self .indicator , bool ):
182
182
self .indicator_name = '_merge' if self .indicator else None
183
183
else :
184
- raise ValueError ('indicator option can only accept boolean or string arguments' )
185
-
184
+ raise ValueError (
185
+ 'indicator option can only accept boolean or string arguments' )
186
186
187
187
# note this function has side effects
188
188
(self .left_join_keys ,
@@ -191,7 +191,8 @@ def __init__(self, left, right, how='inner', on=None,
191
191
192
192
def get_result (self ):
193
193
if self .indicator :
194
- self .left , self .right = self ._indicator_pre_merge (self .left , self .right )
194
+ self .left , self .right = self ._indicator_pre_merge (
195
+ self .left , self .right )
195
196
196
197
join_index , left_indexer , right_indexer = self ._get_join_info ()
197
198
@@ -225,9 +226,11 @@ def _indicator_pre_merge(self, left, right):
225
226
226
227
for i in ['_left_indicator' , '_right_indicator' ]:
227
228
if i in columns :
228
- raise ValueError ("Cannot use `indicator=True` option when data contains a column named {}" .format (i ))
229
+ raise ValueError ("Cannot use `indicator=True` option when "
230
+ "data contains a column named {}" .format (i ))
229
231
if self .indicator_name in columns :
230
- raise ValueError ("Cannot use name of an existing column for indicator column" )
232
+ raise ValueError (
233
+ "Cannot use name of an existing column for indicator column" )
231
234
232
235
left = left .copy ()
233
236
right = right .copy ()
@@ -245,11 +248,15 @@ def _indicator_post_merge(self, result):
245
248
result ['_left_indicator' ] = result ['_left_indicator' ].fillna (0 )
246
249
result ['_right_indicator' ] = result ['_right_indicator' ].fillna (0 )
247
250
248
- result [self .indicator_name ] = Categorical ((result ['_left_indicator' ] + result ['_right_indicator' ]), categories = [1 ,2 ,3 ])
249
- result [self .indicator_name ] = result [self .indicator_name ].cat .rename_categories (['left_only' , 'right_only' , 'both' ])
250
-
251
- result = result .drop (labels = ['_left_indicator' , '_right_indicator' ], axis = 1 )
251
+ result [self .indicator_name ] = Categorical ((result ['_left_indicator' ] +
252
+ result ['_right_indicator' ]),
253
+ categories = [1 , 2 , 3 ])
254
+ result [self .indicator_name ] = (
255
+ result [self .indicator_name ]
256
+ .cat .rename_categories (['left_only' , 'right_only' , 'both' ]))
252
257
258
+ result = result .drop (labels = ['_left_indicator' , '_right_indicator' ],
259
+ axis = 1 )
253
260
return result
254
261
255
262
def _maybe_add_join_keys (self , result , left_indexer , right_indexer ):
@@ -274,8 +281,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
274
281
continue
275
282
276
283
right_na_indexer = right_indexer .take (na_indexer )
277
- result .iloc [na_indexer ,key_indexer ] = com .take_1d (self .right_join_keys [i ],
278
- right_na_indexer )
284
+ result .iloc [na_indexer , key_indexer ] = (
285
+ com .take_1d (self .right_join_keys [i ],
286
+ right_na_indexer ))
279
287
elif name in self .right :
280
288
if len (self .right ) == 0 :
281
289
continue
@@ -285,8 +293,9 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
285
293
continue
286
294
287
295
left_na_indexer = left_indexer .take (na_indexer )
288
- result .iloc [na_indexer ,key_indexer ] = com .take_1d (self .left_join_keys [i ],
289
- left_na_indexer )
296
+ result .iloc [na_indexer , key_indexer ] = (
297
+ com .take_1d (self .left_join_keys [i ],
298
+ left_na_indexer ))
290
299
elif left_indexer is not None \
291
300
and isinstance (self .left_join_keys [i ], np .ndarray ):
292
301
@@ -384,8 +393,10 @@ def _get_merge_keys(self):
384
393
left_drop = []
385
394
left , right = self .left , self .right
386
395
387
- is_lkey = lambda x : isinstance (x , (np .ndarray , ABCSeries )) and len (x ) == len (left )
388
- is_rkey = lambda x : isinstance (x , (np .ndarray , ABCSeries )) and len (x ) == len (right )
396
+ is_lkey = lambda x : isinstance (
397
+ x , (np .ndarray , ABCSeries )) and len (x ) == len (left )
398
+ is_rkey = lambda x : isinstance (
399
+ x , (np .ndarray , ABCSeries )) and len (x ) == len (right )
389
400
390
401
# ugh, spaghetti re #733
391
402
if _any (self .left_on ) and _any (self .right_on ):
@@ -507,13 +518,13 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
507
518
from functools import partial
508
519
509
520
assert len (left_keys ) == len (right_keys ), \
510
- 'left_key and right_keys must be the same length'
521
+ 'left_key and right_keys must be the same length'
511
522
512
523
# bind `sort` arg. of _factorize_keys
513
524
fkeys = partial (_factorize_keys , sort = sort )
514
525
515
526
# get left & right join labels and num. of levels at each location
516
- llab , rlab , shape = map (list , zip ( * map (fkeys , left_keys , right_keys )))
527
+ llab , rlab , shape = map (list , zip (* map (fkeys , left_keys , right_keys )))
517
528
518
529
# get flat i8 keys from label lists
519
530
lkey , rkey = _get_join_keys (llab , rlab , shape , sort )
@@ -524,7 +535,7 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'):
524
535
lkey , rkey , count = fkeys (lkey , rkey )
525
536
526
537
# preserve left frame order if how == 'left' and sort == False
527
- kwargs = {'sort' :sort } if how == 'left' else {}
538
+ kwargs = {'sort' : sort } if how == 'left' else {}
528
539
join_func = _join_functions [how ]
529
540
return join_func (lkey , rkey , count , ** kwargs )
530
541
@@ -563,8 +574,10 @@ def get_result(self):
563
574
left_join_indexer = left_indexer
564
575
right_join_indexer = right_indexer
565
576
566
- lindexers = {1 : left_join_indexer } if left_join_indexer is not None else {}
567
- rindexers = {1 : right_join_indexer } if right_join_indexer is not None else {}
577
+ lindexers = {
578
+ 1 : left_join_indexer } if left_join_indexer is not None else {}
579
+ rindexers = {
580
+ 1 : right_join_indexer } if right_join_indexer is not None else {}
568
581
569
582
result_data = concatenate_block_managers (
570
583
[(ldata , lindexers ), (rdata , rindexers )],
@@ -586,7 +599,7 @@ def _get_multiindex_indexer(join_keys, index, sort):
586
599
fkeys = partial (_factorize_keys , sort = sort )
587
600
588
601
# left & right join labels and num. of levels at each location
589
- rlab , llab , shape = map (list , zip ( * map (fkeys , index .levels , join_keys )))
602
+ rlab , llab , shape = map (list , zip (* map (fkeys , index .levels , join_keys )))
590
603
if sort :
591
604
rlab = list (map (np .take , rlab , index .labels ))
592
605
else :
@@ -751,12 +764,13 @@ def _get_join_keys(llab, rlab, shape, sort):
751
764
752
765
return _get_join_keys (llab , rlab , shape , sort )
753
766
754
- #- ---------------------------------------------------------------------
767
+ # ---------------------------------------------------------------------
755
768
# Concatenate DataFrame objects
756
769
757
770
758
771
def concat (objs , axis = 0 , join = 'outer' , join_axes = None , ignore_index = False ,
759
- keys = None , levels = None , names = None , verify_integrity = False , copy = True ):
772
+ keys = None , levels = None , names = None , verify_integrity = False ,
773
+ copy = True ):
760
774
"""
761
775
Concatenate pandas objects along a particular axis with optional set logic
762
776
along the other axes. Can also add a layer of hierarchical indexing on the
@@ -885,10 +899,11 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
885
899
else :
886
900
# filter out the empties
887
901
# if we have not multi-index possibiltes
888
- df = DataFrame ([ obj .shape for obj in objs ]).sum (1 )
889
- non_empties = df [df != 0 ]
890
- if len (non_empties ) and (keys is None and names is None and levels is None and join_axes is None ):
891
- objs = [ objs [i ] for i in non_empties .index ]
902
+ df = DataFrame ([obj .shape for obj in objs ]).sum (1 )
903
+ non_empties = df [df != 0 ]
904
+ if (len (non_empties ) and (keys is None and names is None and
905
+ levels is None and join_axes is None )):
906
+ objs = [objs [i ] for i in non_empties .index ]
892
907
sample = objs [0 ]
893
908
894
909
if sample is None :
@@ -917,12 +932,12 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
917
932
if ndim == max_ndim :
918
933
pass
919
934
920
- elif ndim != max_ndim - 1 :
935
+ elif ndim != max_ndim - 1 :
921
936
raise ValueError ("cannot concatenate unaligned mixed "
922
937
"dimensional NDFrame objects" )
923
938
924
939
else :
925
- name = getattr (obj ,'name' ,None )
940
+ name = getattr (obj , 'name' , None )
926
941
if ignore_index or name is None :
927
942
name = current_column
928
943
current_column += 1
@@ -931,7 +946,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None,
931
946
# to line up
932
947
if self ._is_frame and axis == 1 :
933
948
name = 0
934
- obj = sample ._constructor ({ name : obj })
949
+ obj = sample ._constructor ({name : obj })
935
950
936
951
self .objs .append (obj )
937
952
@@ -957,17 +972,23 @@ def get_result(self):
957
972
if self .axis == 0 :
958
973
new_data = com ._concat_compat ([x ._values for x in self .objs ])
959
974
name = com ._consensus_name_attr (self .objs )
960
- return Series (new_data , index = self .new_axes [0 ], name = name ).__finalize__ (self , method = 'concat' )
975
+ return (Series (new_data , index = self .new_axes [0 ], name = name )
976
+ .__finalize__ (self , method = 'concat' ))
961
977
962
978
# combine as columns in a frame
963
979
else :
964
980
data = dict (zip (range (len (self .objs )), self .objs ))
965
981
index , columns = self .new_axes
966
982
tmpdf = DataFrame (data , index = index )
967
- # checks if the column variable already stores valid column names (because set via the 'key' argument
968
- # in the 'concat' function call. If that's not the case, use the series names as column names
969
- if columns .equals (Index (np .arange (len (self .objs )))) and not self .ignore_index :
970
- columns = np .array ([ data [i ].name for i in range (len (data )) ], dtype = 'object' )
983
+ # checks if the column variable already stores valid column
984
+ # names (because set via the 'key' argument in the 'concat'
985
+ # function call. If that's not the case, use the series names
986
+ # as column names
987
+ if (columns .equals (Index (np .arange (len (self .objs )))) and
988
+ not self .ignore_index ):
989
+ columns = np .array ([data [i ].name
990
+ for i in range (len (data ))],
991
+ dtype = 'object' )
971
992
indexer = isnull (columns )
972
993
if indexer .any ():
973
994
columns [indexer ] = np .arange (len (indexer [indexer ]))
@@ -992,11 +1013,13 @@ def get_result(self):
992
1013
mgrs_indexers .append ((obj ._data , indexers ))
993
1014
994
1015
new_data = concatenate_block_managers (
995
- mgrs_indexers , self .new_axes , concat_axis = self .axis , copy = self .copy )
1016
+ mgrs_indexers , self .new_axes ,
1017
+ concat_axis = self .axis , copy = self .copy )
996
1018
if not self .copy :
997
1019
new_data ._consolidate_inplace ()
998
1020
999
- return self .objs [0 ]._from_axes (new_data , self .new_axes ).__finalize__ (self , method = 'concat' )
1021
+ return (self .objs [0 ]._from_axes (new_data , self .new_axes )
1022
+ .__finalize__ (self , method = 'concat' ))
1000
1023
1001
1024
def _get_result_dim (self ):
1002
1025
if self ._is_series and self .axis == 1 :
@@ -1091,7 +1114,7 @@ def _maybe_check_integrity(self, concat_index):
1091
1114
if not concat_index .is_unique :
1092
1115
overlap = concat_index .get_duplicates ()
1093
1116
raise ValueError ('Indexes have overlapping values: %s'
1094
- % str (overlap ))
1117
+ % str (overlap ))
1095
1118
1096
1119
1097
1120
def _concat_indexes (indexes ):
@@ -1106,7 +1129,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
1106
1129
names = [None ] * len (zipped )
1107
1130
1108
1131
if levels is None :
1109
- levels = [Categorical .from_array (zp , ordered = True ).categories for zp in zipped ]
1132
+ levels = [Categorical .from_array (
1133
+ zp , ordered = True ).categories for zp in zipped ]
1110
1134
else :
1111
1135
levels = [_ensure_index (x ) for x in levels ]
1112
1136
else :
@@ -1152,7 +1176,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
1152
1176
names = list (names )
1153
1177
else :
1154
1178
# make sure that all of the passed indices have the same nlevels
1155
- if not len (set ([ i .nlevels for i in indexes ])) == 1 :
1179
+ if not len (set ([i .nlevels for i in indexes ])) == 1 :
1156
1180
raise AssertionError ("Cannot concat indices that do"
1157
1181
" not have the same number of levels" )
1158
1182
@@ -1201,7 +1225,8 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None):
1201
1225
1202
1226
1203
1227
def _should_fill (lname , rname ):
1204
- if not isinstance (lname , compat .string_types ) or not isinstance (rname , compat .string_types ):
1228
+ if (not isinstance (lname , compat .string_types ) or
1229
+ not isinstance (rname , compat .string_types )):
1205
1230
return True
1206
1231
return lname == rname
1207
1232
0 commit comments