diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 9a7a9c2a87e52..5f7526235a4c3 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -200,7 +200,9 @@ Reshaping by Melt The ``melt`` function found in ``pandas.core.reshape`` is useful to massage a DataFrame into a format where one or more columns are identifier variables, while all other columns, considered measured variables, are "pivoted" to the -row axis, leaving just two non-identifier columns, "variable" and "value". +row axis, leaving just two non-identifier columns, "variable" and "value". The +names of those columns can be customized by supplying the ``var_name`` and +``value_name`` parameters. For instance, @@ -212,6 +214,7 @@ For instance, 'weight' : [130, 150]}) cheese melt(cheese, id_vars=['first', 'last']) + melt(cheese, id_vars=['first', 'last'], var_name='quantity') Combining with stats and GroupBy -------------------------------- diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt index a42765591c818..13d08a9fc9c76 100644 --- a/doc/source/v0.11.1.txt +++ b/doc/source/v0.11.1.txt @@ -138,6 +138,9 @@ Enhancements import os os.remove('mi.csv') + - ``pd.melt()`` now accepts the optional parameters ``var_name`` and ``value_name`` + to specify custom column names of the returned DataFrame. + Bug Fixes ~~~~~~~~~ diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index b2e5bb01f53af..4e0f35f5d9555 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -600,7 +600,8 @@ def _stack_multi_columns(frame, level=-1, dropna=True): return result -def melt(frame, id_vars=None, value_vars=None): +def melt(frame, id_vars=None, value_vars=None, + var_name='variable', value_name='value'): """ "Unpivots" a DataFrame from wide format to long format, optionally leaving id variables set @@ -608,8 +609,10 @@ def melt(frame, id_vars=None, value_vars=None): Parameters ---------- frame : DataFrame - id_vars : - value_vars : + id_vars : tuple, list, or ndarray + value_vars : tuple, list, or ndarray + var_name : scalar + value_name : scalar Examples -------- @@ -621,9 +624,16 @@ def melt(frame, id_vars=None, value_vars=None): >>> melt(df, id_vars=['A'], value_vars=['B']) A variable value - a B 1 - b B 3 - c B 5 + a B 1 + b B 3 + c B 5 + + >>> melt(df, id_vars=['A'], value_vars=['B'], + ... var_name='myVarname', value_name='myValname') + A myVarname myValname + a B 1 + b B 3 + c B 5 """ # TODO: what about the existing index? if id_vars is not None: @@ -648,11 +658,11 @@ def melt(frame, id_vars=None, value_vars=None): for col in id_vars: mdata[col] = np.tile(frame.pop(col).values, K) - mcolumns = id_vars + ['variable', 'value'] + mcolumns = id_vars + [var_name, value_name] - mdata['value'] = frame.values.ravel('F') - - mdata['variable'] = np.asarray(frame.columns).repeat(N) + mdata[value_name] = frame.values.ravel('F') + mdata[var_name] = np.asarray(frame.columns).repeat(N) + return DataFrame(mdata, columns=mcolumns) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 278e745c7d312..5ddb30b0e1377 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -25,14 +25,73 @@ def test_melt(): df['id1'] = (df['A'] > 0).astype(int) df['id2'] = (df['B'] > 0).astype(int) - molten1 = melt(df) - molten2 = melt(df, id_vars=['id1']) - molten3 = melt(df, id_vars=['id1', 'id2']) - molten4 = melt(df, id_vars=['id1', 'id2'], + var_name = 'var' + value_name = 'val' + + # Default column names + result = melt(df) + result1 = melt(df, id_vars=['id1']) + result2 = melt(df, id_vars=['id1', 'id2']) + result3 = melt(df, id_vars=['id1', 'id2'], value_vars='A') - molten5 = melt(df, id_vars=['id1', 'id2'], + result4 = melt(df, id_vars=['id1', 'id2'], value_vars=['A', 'B']) - + + expected4 = DataFrame({'id1': df['id1'].tolist() * 2, + 'id2': df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + 'value': df['A'].tolist() + df['B'].tolist()}, + columns=['id1', 'id2', 'variable', 'value']) + tm.assert_frame_equal(result4, expected4) + + # Supply custom name for the 'variable' column + result5 = melt(df, var_name=var_name) + result6 = melt(df, id_vars=['id1'], var_name=var_name) + result7 = melt(df, id_vars=['id1', 'id2'], var_name=var_name) + result8 = melt(df, id_vars=['id1', 'id2'], + value_vars='A', var_name=var_name) + result9 = melt(df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=var_name) + + expected9 = DataFrame({'id1': df['id1'].tolist() * 2, + 'id2': df['id2'].tolist() * 2, + var_name: ['A']*10 + ['B']*10, + 'value': df['A'].tolist() + df['B'].tolist()}, + columns=['id1', 'id2', var_name, 'value']) + tm.assert_frame_equal(result9, expected9) + + # Supply custom name for the 'value' column + result10 = melt(df, value_name=value_name) + result11 = melt(df, id_vars=['id1'], value_name=value_name) + result12 = melt(df, id_vars=['id1', 'id2'], value_name=value_name) + result13 = melt(df, id_vars=['id1', 'id2'], + value_vars='A', value_name=value_name) + result14 = melt(df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], value_name=value_name) + + expected14 = DataFrame({'id1': df['id1'].tolist() * 2, + 'id2': df['id2'].tolist() * 2, + 'variable': ['A']*10 + ['B']*10, + value_name: df['A'].tolist() + df['B'].tolist()}, + columns=['id1', 'id2', 'variable', value_name]) + tm.assert_frame_equal(result14, expected14) + + # Supply custom names for the 'variable' and 'value' columns + result15 = melt(df, var_name=var_name, value_name=value_name) + result16 = melt(df, id_vars=['id1'], var_name=var_name, value_name=value_name) + result17 = melt(df, id_vars=['id1', 'id2'], + var_name=var_name, value_name=value_name) + result18 = melt(df, id_vars=['id1', 'id2'], + value_vars='A', var_name=var_name, value_name=value_name) + result19 = melt(df, id_vars=['id1', 'id2'], + value_vars=['A', 'B'], var_name=var_name, value_name=value_name) + + expected19 = DataFrame({'id1': df['id1'].tolist() * 2, + 'id2': df['id2'].tolist() * 2, + var_name: ['A']*10 + ['B']*10, + value_name: df['A'].tolist() + df['B'].tolist()}, + columns=['id1', 'id2', var_name, value_name]) + tm.assert_frame_equal(result19, expected19) def test_convert_dummies(): df = DataFrame({'A': ['foo', 'bar', 'foo', 'bar',