ENH: value_vars for melt

changhiskhan · wesm · commit 637601160efe · 2012-12-07T10:52:43.000-05:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -91,6 +91,7 @@ pandas 0.10.0
     structures, which should do the right thing on both py2.x and py3.x. (#2224)
   - Reduce groupby.apply overhead substantially by low-level manipulation of
     internal NumPy arrays in DataFrames (#535)
+  - Implement ``value_vars`` in ``melt`` and add ``melt`` to pandas namespace (#2412)
 
 **Bug fixes**
 
diff --git a/doc/source/v0.10.0.txt b/doc/source/v0.10.0.txt
@@ -111,6 +111,8 @@ Updated PyTables Support
        import os
        os.remove('store.h5')
 
+  - Implement ``value_vars`` in ``melt`` and add ``melt`` to pandas namespace (GH2412_)
+
 API changes
 ~~~~~~~~~~~
 
@@ -157,3 +159,4 @@ on GitHub for a complete list.
 .. _GH2097: https://github.com/pydata/pandas/issues/2097
 .. _GH2224: https://github.com/pydata/pandas/issues/2224
 .. _GH2431: https://github.com/pydata/pandas/issues/2431
+.. _GH2412: https://github.com/pydata/pandas/issues/2412
diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -40,3 +40,4 @@
 from pandas.tools.pivot import pivot_table, crosstab
 from pandas.tools.plotting import scatter_matrix, plot_params
 from pandas.tools.tile import cut, qcut
+from pandas.core.reshape import melt
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -528,29 +528,34 @@ def melt(frame, id_vars=None, value_vars=None):
     b 3 4
     c 5 6
 
-    >>> melt(df, id_vars=['A'])
+    >>> melt(df, id_vars=['A'], value_vars=['B'])
     A variable value
     a B        1
     b B        3
     c B        5
-    a C        2
-    b C        4
-    c C        6
     """
     # TODO: what about the existing index?
+    if id_vars is not None:
+        if not isinstance(id_vars, (tuple, list, np.ndarray)):
+            id_vars = [id_vars]
+        else:
+            id_vars = list(id_vars)
+    else:
+        id_vars = []
+
+    if value_vars is not None:
+        if not isinstance(value_vars, (tuple, list, np.ndarray)):
+            value_vars = [value_vars]
+        frame = frame.ix[:, id_vars + value_vars]
+    else:
+        frame = frame.copy()
 
     N, K = frame.shape
+    K -= len(id_vars)
 
     mdata = {}
-
-    if id_vars is not None:
-        id_vars = list(id_vars)
-        frame = frame.copy()
-        K -= len(id_vars)
-        for col in id_vars:
-            mdata[col] = np.tile(frame.pop(col).values, K)
-    else:
-        id_vars = []
+    for col in id_vars:
+        mdata[col] = np.tile(frame.pop(col).values, K)
 
     mcolumns = id_vars + ['variable', 'value']
 
diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py
@@ -27,6 +27,10 @@ def test_melt():
     molten1 = melt(df)
     molten2 = melt(df, id_vars=['id1'])
     molten3 = melt(df, id_vars=['id1', 'id2'])
+    molten4 = melt(df, id_vars=['id1', 'id2'],
+                   value_vars='A')
+    molten5 = melt(df, id_vars=['id1', 'id2'],
+                   value_vars=['A', 'B'])
 
 def test_convert_dummies():
     df = DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
diff --git a/vb_suite/reshape.py b/vb_suite/reshape.py
@@ -49,3 +49,14 @@ def unpivot(frame):
 unstack_sparse_keyspace = Benchmark('idf.unstack()', setup,
                                     start_date=datetime(2011, 10, 1))
 
+# Melt
+
+setup = common_setup + """
+from pandas.core.reshape import melt
+df = DataFrame(np.random.randn(10000, 3), columns=['A', 'B', 'C'])
+df['id1'] = np.random.randint(0, 10, 10000)
+df['id2'] = np.random.randint(100, 1000, 10000)
+"""
+
+melt_dataframe = Benchmark("melt(df, id_vars=['id1', 'id2'])", setup,
+                           start_date=datetime(2012, 8, 1))