@@ -925,27 +925,99 @@ def _reorder_for_extension_array_stack(
925
925
def stack_v3 (frame : DataFrame , level : list [int ]) -> Series | DataFrame :
926
926
if frame .columns .nunique () != len (frame .columns ):
927
927
raise ValueError ("Columns with duplicate values are not supported in stack" )
928
-
929
- # If we need to drop `level` from columns, it needs to be in descending order
930
928
set_levels = set (level )
931
- drop_levnums = sorted (level , reverse = True )
932
929
stack_cols = frame .columns ._drop_level_numbers (
933
930
[k for k in range (frame .columns .nlevels - 1 , - 1 , - 1 ) if k not in set_levels ]
934
931
)
932
+
933
+ result = stack_reshape (frame , level , set_levels , stack_cols )
934
+
935
+ # Construct the correct MultiIndex by combining the frame's index and
936
+ # stacked columns.
937
+ ratio = 0 if frame .empty else len (result ) // len (frame )
938
+
939
+ index_levels : list | FrozenList
940
+ if isinstance (frame .index , MultiIndex ):
941
+ index_levels = frame .index .levels
942
+ index_codes = list (np .tile (frame .index .codes , (1 , ratio )))
943
+ else :
944
+ codes , uniques = factorize (frame .index , use_na_sentinel = False )
945
+ index_levels = [uniques ]
946
+ index_codes = list (np .tile (codes , (1 , ratio )))
947
+
935
948
if len (level ) > 1 :
936
949
# Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
937
950
sorter = np .argsort (level )
938
951
assert isinstance (stack_cols , MultiIndex )
939
952
ordered_stack_cols = stack_cols ._reorder_ilevels (sorter )
940
953
else :
941
954
ordered_stack_cols = stack_cols
942
-
943
- stack_cols_unique = stack_cols .unique ()
944
955
ordered_stack_cols_unique = ordered_stack_cols .unique ()
956
+ if isinstance (ordered_stack_cols , MultiIndex ):
957
+ column_levels = ordered_stack_cols .levels
958
+ column_codes = ordered_stack_cols .drop_duplicates ().codes
959
+ else :
960
+ column_levels = [ordered_stack_cols_unique ]
961
+ column_codes = [factorize (ordered_stack_cols_unique , use_na_sentinel = False )[0 ]]
962
+
963
+ # error: Incompatible types in assignment (expression has type "list[ndarray[Any,
964
+ # dtype[Any]]]", variable has type "FrozenList")
965
+ column_codes = [np .repeat (codes , len (frame )) for codes in column_codes ] # type: ignore[assignment]
966
+ result .index = MultiIndex (
967
+ levels = index_levels + column_levels ,
968
+ codes = index_codes + column_codes ,
969
+ names = frame .index .names + list (ordered_stack_cols .names ),
970
+ verify_integrity = False ,
971
+ )
972
+
973
+ # sort result, but faster than calling sort_index since we know the order we need
974
+ len_df = len (frame )
975
+ n_uniques = len (ordered_stack_cols_unique )
976
+ indexer = np .arange (n_uniques )
977
+ idxs = np .tile (len_df * indexer , len_df ) + np .repeat (np .arange (len_df ), n_uniques )
978
+ result = result .take (idxs )
979
+
980
+ # Reshape/rename if needed and dropna
981
+ if result .ndim == 2 and frame .columns .nlevels == len (level ):
982
+ if len (result .columns ) == 0 :
983
+ result = Series (index = result .index )
984
+ else :
985
+ result = result .iloc [:, 0 ]
986
+ if result .ndim == 1 :
987
+ result .name = None
988
+
989
+ return result
990
+
991
+
992
+ def stack_reshape (
993
+ frame : DataFrame , level : list [int ], set_levels : set [int ], stack_cols : Index
994
+ ) -> Series | DataFrame :
995
+ """Reshape the data of a frame for stack.
996
+
997
+ This function takes care of most of the work that stack needs to do. Caller
998
+ will sort the result once the appropriate index is set.
999
+
1000
+ Parameters
1001
+ ----------
1002
+ frame: DataFrame
1003
+ DataFrame that is to be stacked.
1004
+ level: list of ints.
1005
+ Levels of the columns to stack.
1006
+ set_levels: set of ints.
1007
+ Same as level, but as a set.
1008
+ stack_cols: Index.
1009
+ Columns of the result when the DataFrame is stacked.
1010
+
1011
+ Returns
1012
+ -------
1013
+ The data of behind the stacked DataFrame.
1014
+ """
1015
+ # If we need to drop `level` from columns, it needs to be in descending order
1016
+ drop_levnums = sorted (level , reverse = True )
945
1017
946
1018
# Grab data for each unique index to be stacked
947
1019
buf = []
948
- for idx in stack_cols_unique :
1020
+ for idx in stack_cols . unique () :
949
1021
if len (frame .columns ) == 1 :
950
1022
data = frame .copy ()
951
1023
else :
@@ -972,10 +1044,8 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
972
1044
data .columns = RangeIndex (len (data .columns ))
973
1045
buf .append (data )
974
1046
975
- result : Series | DataFrame
976
1047
if len (buf ) > 0 and not frame .empty :
977
1048
result = concat (buf , ignore_index = True )
978
- ratio = len (result ) // len (frame )
979
1049
else :
980
1050
# input is empty
981
1051
if len (level ) < frame .columns .nlevels :
@@ -984,54 +1054,11 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
984
1054
else :
985
1055
new_columns = [0 ]
986
1056
result = DataFrame (columns = new_columns , dtype = frame ._values .dtype )
987
- ratio = 0
988
1057
989
1058
if len (level ) < frame .columns .nlevels :
990
1059
# concat column order may be different from dropping the levels
991
1060
desired_columns = frame .columns ._drop_level_numbers (drop_levnums ).unique ()
992
1061
if not result .columns .equals (desired_columns ):
993
1062
result = result [desired_columns ]
994
1063
995
- # Construct the correct MultiIndex by combining the frame's index and
996
- # stacked columns.
997
- index_levels : list | FrozenList
998
- if isinstance (frame .index , MultiIndex ):
999
- index_levels = frame .index .levels
1000
- index_codes = list (np .tile (frame .index .codes , (1 , ratio )))
1001
- else :
1002
- codes , uniques = factorize (frame .index , use_na_sentinel = False )
1003
- index_levels = [uniques ]
1004
- index_codes = list (np .tile (codes , (1 , ratio )))
1005
- if isinstance (ordered_stack_cols , MultiIndex ):
1006
- column_levels = ordered_stack_cols .levels
1007
- column_codes = ordered_stack_cols .drop_duplicates ().codes
1008
- else :
1009
- column_levels = [ordered_stack_cols .unique ()]
1010
- column_codes = [factorize (ordered_stack_cols_unique , use_na_sentinel = False )[0 ]]
1011
- # error: Incompatible types in assignment (expression has type "list[ndarray[Any,
1012
- # dtype[Any]]]", variable has type "FrozenList")
1013
- column_codes = [np .repeat (codes , len (frame )) for codes in column_codes ] # type: ignore[assignment]
1014
- result .index = MultiIndex (
1015
- levels = index_levels + column_levels ,
1016
- codes = index_codes + column_codes ,
1017
- names = frame .index .names + list (ordered_stack_cols .names ),
1018
- verify_integrity = False ,
1019
- )
1020
-
1021
- # sort result, but faster than calling sort_index since we know the order we need
1022
- len_df = len (frame )
1023
- n_uniques = len (ordered_stack_cols_unique )
1024
- indexer = np .arange (n_uniques )
1025
- idxs = np .tile (len_df * indexer , len_df ) + np .repeat (np .arange (len_df ), n_uniques )
1026
- result = result .take (idxs )
1027
-
1028
- # Reshape/rename if needed and dropna
1029
- if result .ndim == 2 and frame .columns .nlevels == len (level ):
1030
- if len (result .columns ) == 0 :
1031
- result = Series (index = result .index )
1032
- else :
1033
- result = result .iloc [:, 0 ]
1034
- if result .ndim == 1 :
1035
- result .name = None
1036
-
1037
1064
return result
0 commit comments