@@ -3432,11 +3432,17 @@ def rename(index):
3432
3432
return ("level_{}" .format (index ),)
3433
3433
3434
3434
if level is None :
3435
- new_index_map = [
3436
- ( column , name if name is not None else rename (i ) )
3437
- for i , ( column , name ) in enumerate (self ._internal .index_map . items () )
3435
+ new_column_labels = [
3436
+ name if name is not None else rename (i )
3437
+ for i , name in enumerate (self ._internal .index_names )
3438
3438
]
3439
- index_map = {} # type: Dict
3439
+ new_data_spark_columns = [
3440
+ scol .alias (name_like_string (label ))
3441
+ for scol , label in zip (self ._internal .index_spark_columns , new_column_labels )
3442
+ ]
3443
+
3444
+ index_spark_column_names = []
3445
+ index_names = []
3440
3446
else :
3441
3447
if is_list_like (level ):
3442
3448
level = list (level )
@@ -3478,35 +3484,29 @@ def rename(index):
3478
3484
raise ValueError ("Level should be all int or all string." )
3479
3485
idx .sort ()
3480
3486
3481
- new_index_map = []
3482
- index_map_items = list (self ._internal .index_map .items ())
3483
- new_index_map_items = index_map_items .copy ()
3484
- for i in idx :
3485
- info = index_map_items [i ]
3486
- index_column , index_name = info
3487
- new_index_map .append (
3488
- (index_column , index_name if index_name is not None else rename (i ))
3489
- )
3490
- new_index_map_items .remove (info )
3487
+ new_column_labels = []
3488
+ new_data_spark_columns = []
3491
3489
3492
- index_map = OrderedDict (new_index_map_items )
3490
+ index_spark_column_names = self ._internal .index_spark_column_names .copy ()
3491
+ index_spark_columns = self ._internal .index_spark_columns .copy ()
3492
+ index_names = self ._internal .index_names .copy ()
3493
3493
3494
- if drop :
3495
- new_index_map = []
3494
+ for i in idx [:: - 1 ] :
3495
+ index_spark_column_names . pop ( i )
3496
3496
3497
- for _ , name in new_index_map :
3498
- if name in self ._internal .column_labels :
3499
- raise ValueError ("cannot insert {}, already exists" .format (name_like_string (name )))
3497
+ name = index_names .pop (i )
3498
+ new_column_labels .insert (0 , name if name is not None else rename (i ))
3500
3499
3501
- sdf = self ._internal .spark_frame
3502
- new_data_scols = [
3503
- scol_for (sdf , column ).alias (name_like_string (name )) for column , name in new_index_map
3504
- ]
3500
+ scol = index_spark_columns .pop (i )
3501
+ new_data_spark_columns .insert (0 , scol .alias (name_like_string (name )))
3505
3502
3506
- index_scols = [scol_for (sdf , column ) for column in index_map ]
3507
- sdf = sdf .select (
3508
- index_scols + new_data_scols + self ._internal .data_spark_columns + list (HIDDEN_COLUMNS )
3509
- )
3503
+ if drop :
3504
+ new_data_spark_columns = []
3505
+ new_column_labels = []
3506
+
3507
+ for label in new_column_labels :
3508
+ if label in self ._internal .column_labels :
3509
+ raise ValueError ("cannot insert {}, already exists" .format (name_like_string (label )))
3510
3510
3511
3511
if self ._internal .column_labels_level > 1 :
3512
3512
column_depth = len (self ._internal .column_labels [0 ])
@@ -3516,28 +3516,22 @@ def rename(index):
3516
3516
column_depth , col_level + 1
3517
3517
)
3518
3518
)
3519
- if any (col_level + len (name ) > column_depth for _ , name in new_index_map ):
3519
+ if any (col_level + len (label ) > column_depth for label in new_column_labels ):
3520
3520
raise ValueError ("Item must have length equal to number of levels." )
3521
- column_labels = [
3521
+ new_column_labels = [
3522
3522
tuple (
3523
3523
([col_fill ] * col_level )
3524
- + list (name )
3525
- + ([col_fill ] * (column_depth - (len (name ) + col_level )))
3524
+ + list (label )
3525
+ + ([col_fill ] * (column_depth - (len (label ) + col_level )))
3526
3526
)
3527
- for _ , name in new_index_map
3528
- ] + self ._internal .column_labels
3529
- else :
3530
- column_labels = [name for _ , name in new_index_map ] + self ._internal .column_labels
3527
+ for label in new_column_labels
3528
+ ]
3531
3529
3532
3530
internal = self ._internal .copy (
3533
- spark_frame = sdf ,
3534
- index_spark_column_names = list (index_map .keys ()),
3535
- index_names = list (index_map .values ()),
3536
- column_labels = column_labels ,
3537
- data_spark_columns = (
3538
- [scol_for (sdf , name_like_string (name )) for _ , name in new_index_map ]
3539
- + [scol_for (sdf , col ) for col in self ._internal .data_spark_column_names ]
3540
- ),
3531
+ index_spark_column_names = index_spark_column_names ,
3532
+ index_names = index_names ,
3533
+ column_labels = new_column_labels + self ._internal .column_labels ,
3534
+ data_spark_columns = new_data_spark_columns + self ._internal .data_spark_columns ,
3541
3535
)
3542
3536
3543
3537
if inplace :
@@ -5957,11 +5951,10 @@ def droplevel(self, level, axis=0) -> "DataFrame":
5957
5951
if not isinstance (level , (tuple , list )): # huh?
5958
5952
level = [level ]
5959
5953
5960
- spark_frame = self ._internal .spark_frame
5961
- index_map = self ._internal .index_map .copy ()
5962
5954
index_names = self .index .names
5963
- nlevels = self .index .nlevels
5964
- int_levels = list ()
5955
+ nlevels = self ._internal .index_level
5956
+
5957
+ int_level = set ()
5965
5958
for n in level :
5966
5959
if isinstance (n , int ):
5967
5960
if n < 0 :
@@ -5981,22 +5974,27 @@ def droplevel(self, level, axis=0) -> "DataFrame":
5981
5974
if n not in index_names :
5982
5975
raise KeyError ("Level {} not found" .format (n ))
5983
5976
n = index_names .index (n )
5984
- int_levels . append (n )
5977
+ int_level . add (n )
5985
5978
5986
- if len (int_levels ) >= nlevels :
5979
+ if len (level ) >= nlevels :
5987
5980
raise ValueError (
5988
5981
"Cannot remove {} levels from an index with {} levels: "
5989
- "at least one level must be left." .format (len (int_levels ), nlevels )
5982
+ "at least one level must be left." .format (len (level ), nlevels )
5990
5983
)
5991
5984
5992
- for int_level in int_levels :
5993
- index_spark_column = self ._internal .index_spark_column_names [int_level ]
5994
- spark_frame = spark_frame .drop (index_spark_column )
5995
- index_map .pop (index_spark_column )
5985
+ index_spark_column_names , index_names = zip (
5986
+ * [
5987
+ item
5988
+ for i , item in enumerate (
5989
+ zip (self ._internal .index_spark_column_names , self ._internal .index_names )
5990
+ )
5991
+ if i not in int_level
5992
+ ]
5993
+ )
5994
+
5996
5995
internal = self ._internal .copy (
5997
- spark_frame = spark_frame ,
5998
- index_spark_column_names = list (index_map .keys ()),
5999
- index_names = list (index_map .values ()),
5996
+ index_spark_column_names = list (index_spark_column_names ),
5997
+ index_names = list (index_names ),
6000
5998
)
6001
5999
return DataFrame (internal )
6002
6000
else :
@@ -6845,33 +6843,38 @@ def to_list(os: Optional[Union[Any, List[Any], Tuple, List[Tuple]]]) -> List[Tup
6845
6843
if right_index :
6846
6844
if how in ("inner" , "left" ):
6847
6845
exprs .extend (left_index_scols )
6848
- index_map = self ._internal .index_map
6846
+ index_spark_column_names = self ._internal .index_spark_column_names
6847
+ index_names = self ._internal .index_names
6849
6848
elif how == "right" :
6850
6849
exprs .extend (right_index_scols )
6851
- index_map = right ._internal .index_map
6850
+ index_spark_column_names = right ._internal .index_spark_column_names
6851
+ index_names = right ._internal .index_names
6852
6852
else :
6853
- index_map = OrderedDict ()
6854
- for (col , name ), left_scol , right_scol in zip (
6855
- self ._internal .index_map .items (), left_index_scols , right_index_scols
6853
+ index_spark_column_names = self ._internal .index_spark_column_names
6854
+ index_names = self ._internal .index_names
6855
+ for col , left_scol , right_scol in zip (
6856
+ index_spark_column_names , left_index_scols , right_index_scols
6856
6857
):
6857
6858
scol = F .when (left_scol .isNotNull (), left_scol ).otherwise (right_scol )
6858
6859
exprs .append (scol .alias (col ))
6859
- index_map [col ] = name
6860
6860
else :
6861
6861
exprs .extend (right_index_scols )
6862
- index_map = right ._internal .index_map
6862
+ index_spark_column_names = right ._internal .index_spark_column_names
6863
+ index_names = right ._internal .index_names
6863
6864
elif right_index :
6864
6865
exprs .extend (left_index_scols )
6865
- index_map = self ._internal .index_map
6866
+ index_spark_column_names = self ._internal .index_spark_column_names
6867
+ index_names = self ._internal .index_names
6866
6868
else :
6867
- index_map = OrderedDict ()
6869
+ index_spark_column_names = None
6870
+ index_names = None
6868
6871
6869
6872
selected_columns = joined_table .select (* exprs )
6870
6873
6871
6874
internal = InternalFrame (
6872
6875
spark_frame = selected_columns ,
6873
- index_spark_column_names = list ( index_map . keys ()) if index_map else None ,
6874
- index_names = list ( index_map . values ()) if index_map else None ,
6876
+ index_spark_column_names = index_spark_column_names ,
6877
+ index_names = index_names ,
6875
6878
column_labels = column_labels ,
6876
6879
data_spark_columns = [scol_for (selected_columns , col ) for col in data_columns ],
6877
6880
)
0 commit comments