@@ -751,16 +751,16 @@ def _convert_level_number(level_num, columns):
751
751
return result
752
752
753
753
754
- def from_dummies (data , columns = None , prefix_sep = "_" , dtype = "category" , fill_first = None ):
754
+ def from_dummies (data , prefix = None , prefix_sep = "_" , dtype = "category" , fill_first = None ):
755
755
"""
756
756
The inverse transformation of ``pandas.get_dummies``.
757
757
758
758
Parameters
759
759
----------
760
760
data : DataFrame
761
- columns : list-like, default None
762
- Column names in the DataFrame to be decoded.
763
- If `columns ` is None then all the columns will be converted .
761
+ prefix : list-like, default None
762
+ Prefixes of the columns in the DataFrame to be decoded.
763
+ If `prefix ` is None then all the columns will be decoded .
764
764
prefix_sep : str, default '_'
765
765
Separator between original column name and dummy variable
766
766
dtype : dtype, default 'category'
@@ -792,7 +792,7 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
792
792
793
793
We can recover the original dataframe using `from_dummies`:
794
794
795
- >>> pd.from_dummies(df, columns =['animal'])
795
+ >>> pd.from_dummies(df, prefix =['animal'])
796
796
other_col animal
797
797
0 a zebra
798
798
1 b lemur
@@ -811,7 +811,7 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
811
811
We can still recover the original dataframe, by using the argument
812
812
`fill_first`:
813
813
814
- >>> pd.from_dummies(df, columns =["animal"], fill_first=["zebra"])
814
+ >>> pd.from_dummies(df, prefix =["animal"], fill_first=["zebra"])
815
815
other_col animal
816
816
0 a zebra
817
817
1 b lemur
@@ -820,15 +820,13 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
820
820
if dtype is None :
821
821
dtype = "category"
822
822
823
- if columns is None :
823
+ if prefix is None :
824
824
data_to_decode = data .copy ()
825
- columns = data .columns .tolist ()
826
- columns = list (
827
- {i .split (prefix_sep )[0 ] for i in data .columns if prefix_sep in i }
828
- )
825
+ prefix = data .columns .tolist ()
826
+ prefix = list ({i .split (prefix_sep )[0 ] for i in data .columns if prefix_sep in i })
829
827
830
828
data_to_decode = data [
831
- [i for i in data .columns for c in columns if i .startswith (c + prefix_sep )]
829
+ [i for i in data .columns for p in prefix if i .startswith (p + prefix_sep )]
832
830
]
833
831
834
832
# Check each row sums to 1 or 0
@@ -839,30 +837,30 @@ def from_dummies(data, columns=None, prefix_sep="_", dtype="category", fill_firs
839
837
)
840
838
841
839
if fill_first is None :
842
- fill_first = [None ] * len (columns )
840
+ fill_first = [None ] * len (prefix )
843
841
elif isinstance (fill_first , str ):
844
842
fill_first = itertools .cycle ([fill_first ])
845
843
elif isinstance (fill_first , dict ):
846
- fill_first = [fill_first [col ] for col in columns ]
844
+ fill_first = [fill_first [p ] for p in prefix ]
847
845
848
846
out = data .copy ()
849
- for column , fill_first_ in zip (columns , fill_first ):
847
+ for prefix_ , fill_first_ in zip (prefix , fill_first ):
850
848
cols , labels = [
851
849
[
852
850
i .replace (x , "" )
853
851
for i in data_to_decode .columns
854
- if column + prefix_sep in i
852
+ if prefix_ + prefix_sep in i
855
853
]
856
- for x in ["" , column + prefix_sep ]
854
+ for x in ["" , prefix_ + prefix_sep ]
857
855
]
858
856
if not cols :
859
857
continue
860
858
out = out .drop (cols , axis = 1 )
861
859
if fill_first_ :
862
- cols = [column + prefix_sep + fill_first_ ] + cols
860
+ cols = [prefix_ + prefix_sep + fill_first_ ] + cols
863
861
labels = [fill_first_ ] + labels
864
862
data [cols [0 ]] = (1 - data [cols [1 :]]).all (axis = 1 )
865
- out [column ] = Series (
863
+ out [prefix_ ] = Series (
866
864
np .array (labels )[np .argmax (data [cols ].to_numpy (), axis = 1 )], dtype = dtype
867
865
)
868
866
return out
0 commit comments