Skip to content

Commit 0378de5

Browse files
committed
Merge pull request #4144 from hayd/melt_name
ENH melt uses column name if available
2 parents 565ee0c + 920ef81 commit 0378de5

File tree

3 files changed

+146
-105
lines changed

3 files changed

+146
-105
lines changed

doc/source/release.rst

+4-3
Original file line numberDiff line numberDiff line change
@@ -75,13 +75,14 @@ pandas 0.12
7575
- Simplified the API and added a describe method to Categorical
7676
- ``melt`` now accepts the optional parameters ``var_name`` and ``value_name``
7777
to specify custom column names of the returned DataFrame (:issue:`3649`),
78-
thanks @hoechenberger
78+
thanks @hoechenberger. If ``var_name`` is not specified and ``dataframe.columns.name``
79+
is not None, then this will be used as the ``var_name`` (:issue:`4144`).
7980
- clipboard functions use pyperclip (no dependencies on Windows, alternative
8081
dependencies offered for Linux) (:issue:`3837`).
8182
- Plotting functions now raise a ``TypeError`` before trying to plot anything
8283
if the associated objects have have a dtype of ``object`` (:issue:`1818`,
83-
:issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object arrays to
84-
numeric arrays if possible so that you can still plot, for example, an
84+
:issue:`3572`, :issue:`3911`, :issue:`3912`), but they will try to convert object
85+
arrays to numeric arrays if possible so that you can still plot, for example, an
8586
object array with floats. This happens before any drawing takes place which
8687
elimnates any spurious plots from showing up.
8788
- Added Faq section on repr display options, to help users customize their setup.

pandas/core/reshape.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -601,7 +601,7 @@ def _stack_multi_columns(frame, level=-1, dropna=True):
601601

602602

603603
def melt(frame, id_vars=None, value_vars=None,
604-
var_name='variable', value_name='value'):
604+
var_name=None, value_name='value'):
605605
"""
606606
"Unpivots" a DataFrame from wide format to long format, optionally leaving
607607
id variables set
@@ -611,8 +611,8 @@ def melt(frame, id_vars=None, value_vars=None,
611611
frame : DataFrame
612612
id_vars : tuple, list, or ndarray
613613
value_vars : tuple, list, or ndarray
614-
var_name : scalar
615-
value_name : scalar
614+
var_name : scalar, if None uses frame.column.name or 'variable'
615+
value_name : scalar, default 'value'
616616
617617
Examples
618618
--------
@@ -634,6 +634,7 @@ def melt(frame, id_vars=None, value_vars=None,
634634
a B 1
635635
b B 3
636636
c B 5
637+
637638
"""
638639
# TODO: what about the existing index?
639640
if id_vars is not None:
@@ -651,6 +652,9 @@ def melt(frame, id_vars=None, value_vars=None,
651652
else:
652653
frame = frame.copy()
653654

655+
if var_name is None:
656+
var_name = frame.columns.name if frame.columns.name is not None else 'variable'
657+
654658
N, K = frame.shape
655659
K -= len(id_vars)
656660

pandas/tests/test_reshape.py

+135-99
Original file line numberDiff line numberDiff line change
@@ -20,105 +20,141 @@
2020
_multiprocess_can_split_ = True
2121

2222

23-
def test_melt():
24-
df = tm.makeTimeDataFrame()[:10]
25-
df['id1'] = (df['A'] > 0).astype(np.int64)
26-
df['id2'] = (df['B'] > 0).astype(np.int64)
27-
28-
var_name = 'var'
29-
value_name = 'val'
30-
31-
# Default column names
32-
result = melt(df)
33-
result1 = melt(df, id_vars=['id1'])
34-
result2 = melt(df, id_vars=['id1', 'id2'])
35-
result3 = melt(df, id_vars=['id1', 'id2'],
36-
value_vars='A')
37-
result4 = melt(df, id_vars=['id1', 'id2'],
38-
value_vars=['A', 'B'])
39-
40-
expected4 = DataFrame({'id1': df['id1'].tolist() * 2,
41-
'id2': df['id2'].tolist() * 2,
42-
'variable': ['A']*10 + ['B']*10,
43-
'value': df['A'].tolist() + df['B'].tolist()},
44-
columns=['id1', 'id2', 'variable', 'value'])
45-
tm.assert_frame_equal(result4, expected4)
46-
47-
# Supply custom name for the 'variable' column
48-
result5 = melt(df, var_name=var_name)
49-
result6 = melt(df, id_vars=['id1'], var_name=var_name)
50-
result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name)
51-
result8 = melt(df, id_vars=['id1', 'id2'],
52-
value_vars='A', var_name=var_name)
53-
result9 = melt(df, id_vars=['id1', 'id2'],
54-
value_vars=['A', 'B'], var_name=var_name)
55-
56-
expected9 = DataFrame({'id1': df['id1'].tolist() * 2,
57-
'id2': df['id2'].tolist() * 2,
58-
var_name: ['A']*10 + ['B']*10,
59-
'value': df['A'].tolist() + df['B'].tolist()},
60-
columns=['id1', 'id2', var_name, 'value'])
61-
tm.assert_frame_equal(result9, expected9)
62-
63-
# Supply custom name for the 'value' column
64-
result10 = melt(df, value_name=value_name)
65-
result11 = melt(df, id_vars=['id1'], value_name=value_name)
66-
result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name)
67-
result13 = melt(df, id_vars=['id1', 'id2'],
68-
value_vars='A', value_name=value_name)
69-
result14 = melt(df, id_vars=['id1', 'id2'],
70-
value_vars=['A', 'B'], value_name=value_name)
71-
72-
expected14 = DataFrame({'id1': df['id1'].tolist() * 2,
73-
'id2': df['id2'].tolist() * 2,
74-
'variable': ['A']*10 + ['B']*10,
75-
value_name: df['A'].tolist() + df['B'].tolist()},
76-
columns=['id1', 'id2', 'variable', value_name])
77-
tm.assert_frame_equal(result14, expected14)
78-
79-
# Supply custom names for the 'variable' and 'value' columns
80-
result15 = melt(df, var_name=var_name, value_name=value_name)
81-
result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name)
82-
result17 = melt(df, id_vars=['id1', 'id2'],
83-
var_name=var_name, value_name=value_name)
84-
result18 = melt(df, id_vars=['id1', 'id2'],
85-
value_vars='A', var_name=var_name, value_name=value_name)
86-
result19 = melt(df, id_vars=['id1', 'id2'],
87-
value_vars=['A', 'B'], var_name=var_name, value_name=value_name)
88-
89-
expected19 = DataFrame({'id1': df['id1'].tolist() * 2,
90-
'id2': df['id2'].tolist() * 2,
91-
var_name: ['A']*10 + ['B']*10,
92-
value_name: df['A'].tolist() + df['B'].tolist()},
93-
columns=['id1', 'id2', var_name, value_name])
94-
tm.assert_frame_equal(result19, expected19)
95-
96-
def test_convert_dummies():
97-
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
98-
'foo', 'bar', 'foo', 'foo'],
99-
'B': ['one', 'one', 'two', 'three',
100-
'two', 'two', 'one', 'three'],
101-
'C': np.random.randn(8),
102-
'D': np.random.randn(8)})
103-
104-
result = convert_dummies(df, ['A', 'B'])
105-
result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.')
106-
107-
expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1],
108-
'A_bar': [0, 1, 0, 1, 0, 1, 0, 0],
109-
'B_one': [1, 1, 0, 0, 0, 0, 1, 0],
110-
'B_two': [0, 0, 1, 0, 1, 1, 0, 0],
111-
'B_three': [0, 0, 0, 1, 0, 0, 0, 1],
112-
'C': df['C'].values,
113-
'D': df['D'].values},
114-
columns=result.columns, dtype=float)
115-
expected2 = expected.rename(columns=lambda x: x.replace('_', '.'))
116-
117-
tm.assert_frame_equal(result, expected)
118-
tm.assert_frame_equal(result2, expected2)
119-
120-
121-
class Test_lreshape(unittest.TestCase):
23+
class TestMelt(unittest.TestCase):
24+
25+
def setUp(self):
26+
self.df = tm.makeTimeDataFrame()[:10]
27+
self.df['id1'] = (self.df['A'] > 0).astype(np.int64)
28+
self.df['id2'] = (self.df['B'] > 0).astype(np.int64)
29+
30+
self.var_name = 'var'
31+
self.value_name = 'val'
32+
33+
def test_default_col_names(self):
34+
result = melt(self.df)
35+
self.assertEqual(result.columns.tolist(), ['variable', 'value'])
36+
37+
result1 = melt(self.df, id_vars=['id1'])
38+
self.assertEqual(result1.columns.tolist(), ['id1', 'variable', 'value'])
39+
40+
result2 = melt(self.df, id_vars=['id1', 'id2'])
41+
self.assertEqual(result2.columns.tolist(), ['id1', 'id2', 'variable', 'value'])
42+
43+
def test_value_vars(self):
44+
result3 = melt(self.df, id_vars=['id1', 'id2'], value_vars='A')
45+
self.assertEqual(len(result3), 10)
46+
47+
result4 = melt(self.df, id_vars=['id1', 'id2'], value_vars=['A', 'B'])
48+
expected4 = DataFrame({'id1': self.df['id1'].tolist() * 2,
49+
'id2': self.df['id2'].tolist() * 2,
50+
'variable': ['A']*10 + ['B']*10,
51+
'value': self.df['A'].tolist() + self.df['B'].tolist()},
52+
columns=['id1', 'id2', 'variable', 'value'])
53+
tm.assert_frame_equal(result4, expected4)
54+
55+
def test_custom_var_name(self):
56+
result5 = melt(self.df, var_name=self.var_name)
57+
self.assertEqual(result5.columns.tolist(), ['var', 'value'])
58+
59+
result6 = melt(self.df, id_vars=['id1'], var_name=self.var_name)
60+
self.assertEqual(result6.columns.tolist(), ['id1', 'var', 'value'])
61+
62+
result7 = melt(self.df, id_vars=['id1', 'id2'], var_name=self.var_name)
63+
self.assertEqual(result7.columns.tolist(), ['id1', 'id2', 'var', 'value'])
64+
65+
result8 = melt(self.df, id_vars=['id1', 'id2'],
66+
value_vars='A', var_name=self.var_name)
67+
self.assertEqual(result8.columns.tolist(), ['id1', 'id2', 'var', 'value'])
68+
69+
result9 = melt(self.df, id_vars=['id1', 'id2'],
70+
value_vars=['A', 'B'], var_name=self.var_name)
71+
expected9 = DataFrame({'id1': self.df['id1'].tolist() * 2,
72+
'id2': self.df['id2'].tolist() * 2,
73+
self.var_name: ['A']*10 + ['B']*10,
74+
'value': self.df['A'].tolist() + self.df['B'].tolist()},
75+
columns=['id1', 'id2', self.var_name, 'value'])
76+
tm.assert_frame_equal(result9, expected9)
77+
78+
def test_custom_value_name(self):
79+
result10 = melt(self.df, value_name=self.value_name)
80+
self.assertEqual(result10.columns.tolist(), ['variable', 'val'])
81+
82+
result11 = melt(self.df, id_vars=['id1'], value_name=self.value_name)
83+
self.assertEqual(result11.columns.tolist(), ['id1', 'variable', 'val'])
84+
85+
result12 = melt(self.df, id_vars=['id1', 'id2'], value_name=self.value_name)
86+
self.assertEqual(result12.columns.tolist(), ['id1', 'id2', 'variable', 'val'])
87+
88+
result13 = melt(self.df, id_vars=['id1', 'id2'],
89+
value_vars='A', value_name=self.value_name)
90+
self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val'])
91+
92+
result14 = melt(self.df, id_vars=['id1', 'id2'],
93+
value_vars=['A', 'B'], value_name=self.value_name)
94+
expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2,
95+
'id2': self.df['id2'].tolist() * 2,
96+
'variable': ['A']*10 + ['B']*10,
97+
self.value_name: self.df['A'].tolist() + self.df['B'].tolist()},
98+
columns=['id1', 'id2', 'variable', self.value_name])
99+
tm.assert_frame_equal(result14, expected14)
100+
101+
def test_custom_var_and_value_name(self):
102+
103+
result15 = melt(self.df, var_name=self.var_name, value_name=self.value_name)
104+
self.assertEqual(result15.columns.tolist(), ['var', 'val'])
105+
106+
result16 = melt(self.df, id_vars=['id1'], var_name=self.var_name, value_name=self.value_name)
107+
self.assertEqual(result16.columns.tolist(), ['id1', 'var', 'val'])
108+
109+
result17 = melt(self.df, id_vars=['id1', 'id2'],
110+
var_name=self.var_name, value_name=self.value_name)
111+
self.assertEqual(result17.columns.tolist(), ['id1', 'id2', 'var', 'val'])
112+
113+
result18 = melt(df, id_vars=['id1', 'id2'],
114+
value_vars='A', var_name=self.var_name, value_name=self.value_name)
115+
self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val'])
116+
117+
result19 = melt(self.df, id_vars=['id1', 'id2'],
118+
value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name)
119+
expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2,
120+
'id2': self.df['id2'].tolist() * 2,
121+
var_name: ['A']*10 + ['B']*10,
122+
value_name: self.df['A'].tolist() + self.df['B'].tolist()},
123+
columns=['id1', 'id2', self.var_name, self.value_name])
124+
tm.assert_frame_equal(result19, expected19)
125+
126+
def test_custom_var_and_value_name(self):
127+
self.df.columns.name = 'foo'
128+
result20 = melt(self.df)
129+
self.assertEqual(result20.columns.tolist(), ['foo', 'value'])
130+
131+
class TestConvertDummies(unittest.TestCase):
132+
def test_convert_dummies(self):
133+
df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
134+
'foo', 'bar', 'foo', 'foo'],
135+
'B': ['one', 'one', 'two', 'three',
136+
'two', 'two', 'one', 'three'],
137+
'C': np.random.randn(8),
138+
'D': np.random.randn(8)})
139+
140+
result = convert_dummies(df, ['A', 'B'])
141+
result2 = convert_dummies(df, ['A', 'B'], prefix_sep='.')
142+
143+
expected = DataFrame({'A_foo': [1, 0, 1, 0, 1, 0, 1, 1],
144+
'A_bar': [0, 1, 0, 1, 0, 1, 0, 0],
145+
'B_one': [1, 1, 0, 0, 0, 0, 1, 0],
146+
'B_two': [0, 0, 1, 0, 1, 1, 0, 0],
147+
'B_three': [0, 0, 0, 1, 0, 0, 0, 1],
148+
'C': df['C'].values,
149+
'D': df['D'].values},
150+
columns=result.columns, dtype=float)
151+
expected2 = expected.rename(columns=lambda x: x.replace('_', '.'))
152+
153+
tm.assert_frame_equal(result, expected)
154+
tm.assert_frame_equal(result2, expected2)
155+
156+
157+
class TestLreshape(unittest.TestCase):
122158

123159
def test_pairs(self):
124160
data = {'birthdt': ['08jan2009', '20dec2008', '30dec2008',

0 commit comments

Comments
 (0)