Skip to content

Commit c8d85e2

Browse files
authored
ENH: Add optional argument index to pd.melt to maintain index values (#33659)
1 parent 9b95eeb commit c8d85e2

File tree

6 files changed

+86
-3
lines changed

6 files changed

+86
-3
lines changed

doc/source/user_guide/reshaping.rst

+16
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,22 @@ For instance,
296296
cheese.melt(id_vars=['first', 'last'])
297297
cheese.melt(id_vars=['first', 'last'], var_name='quantity')
298298
299+
When transforming a DataFrame using :func:`~pandas.melt`, the index will be ignored. The original index values can be kept around by setting the ``ignore_index`` parameter to ``False`` (default is ``True``). This will however duplicate them.
300+
301+
.. versionadded:: 1.1.0
302+
303+
.. ipython:: python
304+
305+
index = pd.MultiIndex.from_tuples([('person', 'A'), ('person', 'B')])
306+
cheese = pd.DataFrame({'first': ['John', 'Mary'],
307+
'last': ['Doe', 'Bo'],
308+
'height': [5.5, 6.0],
309+
'weight': [130, 150]},
310+
index=index)
311+
cheese
312+
cheese.melt(id_vars=['first', 'last'])
313+
cheese.melt(id_vars=['first', 'last'], ignore_index=False)
314+
299315
Another way to transform is to use the :func:`~pandas.wide_to_long` panel data
300316
convenience function. It is less flexible than :func:`~pandas.melt`, but more
301317
user-friendly.

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,7 @@ Other enhancements
297297
This can be used to set a custom compression level, e.g.,
298298
``df.to_csv(path, compression={'method': 'gzip', 'compresslevel': 1}``
299299
(:issue:`33196`)
300+
- :meth:`melt` has gained an ``ignore_index`` (default ``True``) argument that, if set to ``False``, prevents the method from dropping the index (:issue:`17440`).
300301
- :meth:`Series.update` now accepts objects that can be coerced to a :class:`Series`,
301302
such as ``dict`` and ``list``, mirroring the behavior of :meth:`DataFrame.update` (:issue:`33215`)
302303
- :meth:`~pandas.core.groupby.GroupBy.transform` and :meth:`~pandas.core.groupby.GroupBy.aggregate` has gained ``engine`` and ``engine_kwargs`` arguments that supports executing functions with ``Numba`` (:issue:`32854`, :issue:`33388`)
@@ -1168,3 +1169,4 @@ Other
11681169

11691170
Contributors
11701171
~~~~~~~~~~~~
1172+

pandas/core/frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2145,7 +2145,7 @@ def to_stata(
21452145
from pandas.io.stata import StataWriter117 as statawriter # type: ignore
21462146
else: # versions 118 and 119
21472147
# mypy: Name 'statawriter' already defined (possibly by an import)
2148-
from pandas.io.stata import StataWriterUTF8 as statawriter # type:ignore
2148+
from pandas.io.stata import StataWriterUTF8 as statawriter # type: ignore
21492149

21502150
kwargs: Dict[str, Any] = {}
21512151
if version is None or version >= 117:
@@ -7105,6 +7105,7 @@ def melt(
71057105
var_name=None,
71067106
value_name="value",
71077107
col_level=None,
7108+
ignore_index=True,
71087109
) -> "DataFrame":
71097110

71107111
return melt(
@@ -7114,6 +7115,7 @@ def melt(
71147115
var_name=var_name,
71157116
value_name=value_name,
71167117
col_level=col_level,
7118+
ignore_index=ignore_index,
71177119
)
71187120

71197121
# ----------------------------------------------------------------------

pandas/core/reshape/melt.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import pandas.core.common as com
1515
from pandas.core.indexes.api import Index, MultiIndex
1616
from pandas.core.reshape.concat import concat
17+
from pandas.core.reshape.util import _tile_compat
1718
from pandas.core.shared_docs import _shared_docs
1819
from pandas.core.tools.numeric import to_numeric
1920

@@ -32,8 +33,8 @@ def melt(
3233
var_name=None,
3334
value_name="value",
3435
col_level=None,
36+
ignore_index: bool = True,
3537
) -> "DataFrame":
36-
# TODO: what about the existing index?
3738
# If multiindex, gather names of columns on all level for checking presence
3839
# of `id_vars` and `value_vars`
3940
if isinstance(frame.columns, MultiIndex):
@@ -132,7 +133,12 @@ def melt(
132133
# asanyarray will keep the columns as an Index
133134
mdata[col] = np.asanyarray(frame.columns._get_level_values(i)).repeat(N)
134135

135-
return frame._constructor(mdata, columns=mcolumns)
136+
result = frame._constructor(mdata, columns=mcolumns)
137+
138+
if not ignore_index:
139+
result.index = _tile_compat(frame.index, K)
140+
141+
return result
136142

137143

138144
@deprecate_kwarg(old_arg_name="label", new_arg_name=None)

pandas/core/shared_docs.py

+16
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,11 @@
2828
Name to use for the 'value' column.
2929
col_level : int or str, optional
3030
If columns are a MultiIndex then use this level to melt.
31+
ignore_index : bool, default True
32+
If True, original index is ignored. If False, the original index is retained.
33+
Index labels will be repeated as necessary.
34+
35+
.. versionadded:: 1.1.0
3136
3237
Returns
3338
-------
@@ -78,6 +83,17 @@
7883
1 b B 3
7984
2 c B 5
8085
86+
Original index values can be kept around:
87+
88+
>>> %(caller)sid_vars=['A'], value_vars=['B', 'C'], ignore_index=False)
89+
A variable value
90+
0 a B 1
91+
1 b B 3
92+
2 c B 5
93+
0 a C 2
94+
1 b C 4
95+
2 c C 6
96+
8197
If you have multi-index columns:
8298
8399
>>> df.columns = [list('ABC'), list('DEF')]

pandas/tests/reshape/test_melt.py

+41
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,47 @@ def test_melt_mixed_int_str_value_vars(self):
357357
expected = DataFrame({"variable": [0, "a"], "value": ["foo", "bar"]})
358358
tm.assert_frame_equal(result, expected)
359359

360+
def test_ignore_index(self):
361+
# GH 17440
362+
df = DataFrame({"foo": [0], "bar": [1]}, index=["first"])
363+
result = melt(df, ignore_index=False)
364+
expected = DataFrame(
365+
{"variable": ["foo", "bar"], "value": [0, 1]}, index=["first", "first"]
366+
)
367+
tm.assert_frame_equal(result, expected)
368+
369+
def test_ignore_multiindex(self):
370+
# GH 17440
371+
index = pd.MultiIndex.from_tuples(
372+
[("first", "second"), ("first", "third")], names=["baz", "foobar"]
373+
)
374+
df = DataFrame({"foo": [0, 1], "bar": [2, 3]}, index=index)
375+
result = melt(df, ignore_index=False)
376+
377+
expected_index = pd.MultiIndex.from_tuples(
378+
[("first", "second"), ("first", "third")] * 2, names=["baz", "foobar"]
379+
)
380+
expected = DataFrame(
381+
{"variable": ["foo"] * 2 + ["bar"] * 2, "value": [0, 1, 2, 3]},
382+
index=expected_index,
383+
)
384+
385+
tm.assert_frame_equal(result, expected)
386+
387+
def test_ignore_index_name_and_type(self):
388+
# GH 17440
389+
index = pd.Index(["foo", "bar"], dtype="category", name="baz")
390+
df = DataFrame({"x": [0, 1], "y": [2, 3]}, index=index)
391+
result = melt(df, ignore_index=False)
392+
393+
expected_index = pd.Index(["foo", "bar"] * 2, dtype="category", name="baz")
394+
expected = DataFrame(
395+
{"variable": ["x", "x", "y", "y"], "value": [0, 1, 2, 3]},
396+
index=expected_index,
397+
)
398+
399+
tm.assert_frame_equal(result, expected)
400+
360401

361402
class TestLreshape:
362403
def test_pairs(self):

0 commit comments

Comments
 (0)