ENH: Add optional argument index to pd.melt to maintain index values (#33659)

Rik-de-Kort · web-flow · commit c8d85e23b1f7 · 2020-07-09T19:34:44.000-04:00
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
@@ -296,6 +296,22 @@ For instance,
    cheese.melt(id_vars=['first', 'last'])
    cheese.melt(id_vars=['first', 'last'], var_name='quantity')
 
+When transforming a DataFrame using :func:`~pandas.melt`, the index will be ignored. The original index values can be kept around by setting the ``ignore_index`` parameter to ``False`` (default is ``True``). This will however duplicate them.
+
+.. versionadded:: 1.1.0
+
+.. ipython:: python
+
+   index = pd.MultiIndex.from_tuples([('person', 'A'), ('person', 'B')])
+   cheese = pd.DataFrame({'first': ['John', 'Mary'],
+                          'last': ['Doe', 'Bo'],
+                          'height': [5.5, 6.0],
+                          'weight': [130, 150]},
+                         index=index)
+   cheese
+   cheese.melt(id_vars=['first', 'last'])
+   cheese.melt(id_vars=['first', 'last'], ignore_index=False)
+
 Another way to transform is to use the :func:`~pandas.wide_to_long` panel data
 convenience function. It is less flexible than :func:`~pandas.melt`, but more
 user-friendly.
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -297,6 +297,7 @@ Other enhancements
   This can be used to set a custom compression level, e.g.,
   ``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}``
   (:issue:`33196`)
+- :meth:`melt` has gained an ``ignore_index`` (default ``True``) argument that, if set to ``False``, prevents the method from dropping the index (:issue:`17440`).
 - :meth:`Series.update` now accepts objects that can be coerced to a :class:`Series`,
   such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`)
 - :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`)
@@ -1168,3 +1169,4 @@ Other
 
 Contributors
 ~~~~~~~~~~~~
+
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2145,7 +2145,7 @@ def to_stata(
             from pandas.io.stata import StataWriter117 as statawriter  # type: ignore
         else:  # versions 118 and 119
             # mypy: Name 'statawriter' already defined (possibly by an import)
-            from pandas.io.stata import StataWriterUTF8 as statawriter  # type:ignore
+            from pandas.io.stata import StataWriterUTF8 as statawriter  # type: ignore
 
         kwargs: Dict[str, Any] = {}
         if version is None or version >= 117:
@@ -7105,6 +7105,7 @@ def melt(
         var_name=None,
         value_name="value",
         col_level=None,
+        ignore_index=True,
     ) -> "DataFrame":
 
         return melt(
@@ -7114,6 +7115,7 @@ def melt(
             var_name=var_name,
             value_name=value_name,
             col_level=col_level,
+            ignore_index=ignore_index,
         )
 
     # ----------------------------------------------------------------------
diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py
@@ -14,6 +14,7 @@
 import pandas.core.common as com
 from pandas.core.indexes.api import Index, MultiIndex
 from pandas.core.reshape.concat import concat
+from pandas.core.reshape.util import _tile_compat
 from pandas.core.shared_docs import _shared_docs
 from pandas.core.tools.numeric import to_numeric
 
@@ -32,8 +33,8 @@ def melt(
     var_name=None,
     value_name="value",
     col_level=None,
+    ignore_index: bool = True,
 ) -> "DataFrame":
-    # TODO: what about the existing index?
     # If multiindex, gather names of columns on all level for checking presence
     # of `id_vars` and `value_vars`
     if isinstance(frame.columns, MultiIndex):
@@ -132,7 +133,12 @@ def melt(
         # asanyarray will keep the columns as an Index
         mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N)
 
-    return frame._constructor(mdata, columns=mcolumns)
+    result = frame._constructor(mdata, columns=mcolumns)
+
+    if not ignore_index:
+        result.index = _tile_compat(frame.index, K)
+
+    return result
 
 
 @deprecate_kwarg(old_arg_name="label", new_arg_name=None)
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -28,6 +28,11 @@
         Name to use for the 'value' column.
     col_level : int or str, optional
         If columns are a MultiIndex then use this level to melt.
+    ignore_index : bool, default True
+        If True, original index is ignored. If False, the original index is retained.
+        Index labels will be repeated as necessary.
+
+        .. versionadded:: 1.1.0
 
     Returns
     -------
@@ -78,6 +83,17 @@
     1  b         B          3
     2  c         B          5
 
+    Original index values can be kept around:
+
+    >>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False)
+       A variable  value
+    0  a        B      1
+    1  b        B      3
+    2  c        B      5
+    0  a        C      2
+    1  b        C      4
+    2  c        C      6
+
     If you have multi-index columns:
 
     >>> df.columns = [list('ABC'), list('DEF')]
diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py
@@ -357,6 +357,47 @@ def test_melt_mixed_int_str_value_vars(self):
         expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]})
         tm.assert_frame_equal(result, expected)
 
+    def test_ignore_index(self):
+        # GH 17440
+        df = DataFrame({"foo": [0], "bar": [1]}, index=["first"])
+        result = melt(df, ignore_index=False)
+        expected = DataFrame(
+            {"variable": ["foo", "bar"], "value": [0, 1]}, index=["first", "first"]
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_ignore_multiindex(self):
+        # GH 17440
+        index = pd.MultiIndex.from_tuples(
+            [("first", "second"), ("first", "third")], names=["baz", "foobar"]
+        )
+        df = DataFrame({"foo": [0, 1], "bar": [2, 3]}, index=index)
+        result = melt(df, ignore_index=False)
+
+        expected_index = pd.MultiIndex.from_tuples(
+            [("first", "second"), ("first", "third")] * 2, names=["baz", "foobar"]
+        )
+        expected = DataFrame(
+            {"variable": ["foo"] * 2 + ["bar"] * 2, "value": [0, 1, 2, 3]},
+            index=expected_index,
+        )
+
+        tm.assert_frame_equal(result, expected)
+
+    def test_ignore_index_name_and_type(self):
+        # GH 17440
+        index = pd.Index(["foo", "bar"], dtype="category", name="baz")
+        df = DataFrame({"x": [0, 1], "y": [2, 3]}, index=index)
+        result = melt(df, ignore_index=False)
+
+        expected_index = pd.Index(["foo", "bar"] * 2, dtype="category", name="baz")
+        expected = DataFrame(
+            {"variable": ["x", "x", "y", "y"], "value": [0, 1, 2, 3]},
+            index=expected_index,
+        )
+
+        tm.assert_frame_equal(result, expected)
+
 
 class TestLreshape:
     def test_pairs(self):