13
13
14
14
import pandas .core .algorithms as algos
15
15
from pandas .core .arrays import Categorical
16
- import pandas .core .common as com
17
- from pandas .core .indexes .api import (
18
- Index ,
19
- MultiIndex ,
20
- )
16
+ from pandas .core .indexes .api import MultiIndex
21
17
from pandas .core .reshape .concat import concat
22
18
from pandas .core .reshape .util import tile_compat
23
19
from pandas .core .shared_docs import _shared_docs
31
27
from pandas import DataFrame
32
28
33
29
30
+ def ensure_list_vars (arg_vars , variable : str , columns ) -> list :
31
+ if arg_vars is not None :
32
+ if not is_list_like (arg_vars ):
33
+ return [arg_vars ]
34
+ elif isinstance (columns , MultiIndex ) and not isinstance (arg_vars , list ):
35
+ raise ValueError (
36
+ f"{ variable } must be a list of tuples when columns are a MultiIndex"
37
+ )
38
+ else :
39
+ return list (arg_vars )
40
+ else :
41
+ return []
42
+
43
+
34
44
@Appender (_shared_docs ["melt" ] % {"caller" : "pd.melt(df, " , "other" : "DataFrame.melt" })
35
45
def melt (
36
46
frame : DataFrame ,
@@ -41,61 +51,35 @@ def melt(
41
51
col_level = None ,
42
52
ignore_index : bool = True ,
43
53
) -> DataFrame :
44
- # If multiindex, gather names of columns on all level for checking presence
45
- # of `id_vars` and `value_vars`
46
- if isinstance (frame .columns , MultiIndex ):
47
- cols = [x for c in frame .columns for x in c ]
48
- else :
49
- cols = list (frame .columns )
50
-
51
54
if value_name in frame .columns :
52
55
raise ValueError (
53
56
f"value_name ({ value_name } ) cannot match an element in "
54
57
"the DataFrame columns."
55
58
)
59
+ id_vars = ensure_list_vars (id_vars , "id_vars" , frame .columns )
60
+ value_vars_was_not_none = value_vars is not None
61
+ value_vars = ensure_list_vars (value_vars , "value_vars" , frame .columns )
56
62
57
- if id_vars is not None :
58
- if not is_list_like (id_vars ):
59
- id_vars = [id_vars ]
60
- elif isinstance (frame .columns , MultiIndex ) and not isinstance (id_vars , list ):
61
- raise ValueError (
62
- "id_vars must be a list of tuples when columns are a MultiIndex"
63
- )
64
- else :
65
- # Check that `id_vars` are in frame
66
- id_vars = list (id_vars )
67
- missing = Index (com .flatten (id_vars )).difference (cols )
68
- if not missing .empty :
69
- raise KeyError (
70
- "The following 'id_vars' are not present "
71
- f"in the DataFrame: { list (missing )} "
72
- )
73
- else :
74
- id_vars = []
75
-
76
- if value_vars is not None :
77
- if not is_list_like (value_vars ):
78
- value_vars = [value_vars ]
79
- elif isinstance (frame .columns , MultiIndex ) and not isinstance (value_vars , list ):
80
- raise ValueError (
81
- "value_vars must be a list of tuples when columns are a MultiIndex"
82
- )
83
- else :
84
- value_vars = list (value_vars )
85
- # Check that `value_vars` are in frame
86
- missing = Index (com .flatten (value_vars )).difference (cols )
87
- if not missing .empty :
88
- raise KeyError (
89
- "The following 'value_vars' are not present in "
90
- f"the DataFrame: { list (missing )} "
91
- )
63
+ if id_vars or value_vars :
92
64
if col_level is not None :
93
- idx = frame .columns .get_level_values (col_level ).get_indexer (
94
- id_vars + value_vars
65
+ level = frame .columns .get_level_values (col_level )
66
+ else :
67
+ level = frame .columns
68
+ labels = id_vars + value_vars
69
+ idx = level .get_indexer_for (labels )
70
+ missing = idx == - 1
71
+ if missing .any ():
72
+ missing_labels = [
73
+ lab for lab , not_found in zip (labels , missing ) if not_found
74
+ ]
75
+ raise KeyError (
76
+ "The following id_vars or value_vars are not present in "
77
+ f"the DataFrame: { missing_labels } "
95
78
)
79
+ if value_vars_was_not_none :
80
+ frame = frame .iloc [:, algos .unique (idx )]
96
81
else :
97
- idx = algos .unique (frame .columns .get_indexer_for (id_vars + value_vars ))
98
- frame = frame .iloc [:, idx ]
82
+ frame = frame .copy ()
99
83
else :
100
84
frame = frame .copy ()
101
85
@@ -113,24 +97,26 @@ def melt(
113
97
var_name = [
114
98
frame .columns .name if frame .columns .name is not None else "variable"
115
99
]
116
- if isinstance (var_name , str ):
100
+ elif is_list_like (var_name ):
101
+ raise ValueError (f"{ var_name = } must be a scalar." )
102
+ else :
117
103
var_name = [var_name ]
118
104
119
- N , K = frame .shape
120
- K -= len (id_vars )
105
+ num_rows , K = frame .shape
106
+ num_cols_adjusted = K - len (id_vars )
121
107
122
108
mdata : dict [Hashable , AnyArrayLike ] = {}
123
109
for col in id_vars :
124
110
id_data = frame .pop (col )
125
111
if not isinstance (id_data .dtype , np .dtype ):
126
112
# i.e. ExtensionDtype
127
- if K > 0 :
128
- mdata [col ] = concat ([id_data ] * K , ignore_index = True )
113
+ if num_cols_adjusted > 0 :
114
+ mdata [col ] = concat ([id_data ] * num_cols_adjusted , ignore_index = True )
129
115
else :
130
116
# We can't concat empty list. (GH 46044)
131
117
mdata [col ] = type (id_data )([], name = id_data .name , dtype = id_data .dtype )
132
118
else :
133
- mdata [col ] = np .tile (id_data ._values , K )
119
+ mdata [col ] = np .tile (id_data ._values , num_cols_adjusted )
134
120
135
121
mcolumns = id_vars + var_name + [value_name ]
136
122
@@ -143,12 +129,12 @@ def melt(
143
129
else :
144
130
mdata [value_name ] = frame ._values .ravel ("F" )
145
131
for i , col in enumerate (var_name ):
146
- mdata [col ] = frame .columns ._get_level_values (i ).repeat (N )
132
+ mdata [col ] = frame .columns ._get_level_values (i ).repeat (num_rows )
147
133
148
134
result = frame ._constructor (mdata , columns = mcolumns )
149
135
150
136
if not ignore_index :
151
- result .index = tile_compat (frame .index , K )
137
+ result .index = tile_compat (frame .index , num_cols_adjusted )
152
138
153
139
return result
154
140
0 commit comments