Skip to content

Commit 3e01c38

Browse files
michaelsilversteinjreback
authored andcommitted
Melting with not present column does not produce error (#23575)
1 parent 8a2238c commit 3e01c38

File tree

3 files changed

+71
-0
lines changed

3 files changed

+71
-0
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1442,6 +1442,7 @@ Reshaping
14421442
- Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue:`22796`)
14431443
- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
14441444
- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`).
1445+
- Bug in :func:`pandas.melt` when passing column names that are not present in ``DataFrame`` (:issue:`23575`)
14451446
- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
14461447
- Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)
14471448

pandas/core/reshape/melt.py

+19
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas import compat
1414
from pandas.core.arrays import Categorical
1515
from pandas.core.frame import _shared_docs
16+
from pandas.core.indexes.base import Index
1617
from pandas.core.reshape.concat import concat
1718
from pandas.core.tools.numeric import to_numeric
1819

@@ -24,6 +25,12 @@
2425
def melt(frame, id_vars=None, value_vars=None, var_name=None,
2526
value_name='value', col_level=None):
2627
# TODO: what about the existing index?
28+
# If multiindex, gather names of columns on all level for checking presence
29+
# of `id_vars` and `value_vars`
30+
if isinstance(frame.columns, ABCMultiIndex):
31+
cols = [x for c in frame.columns for x in c]
32+
else:
33+
cols = list(frame.columns)
2734
if id_vars is not None:
2835
if not is_list_like(id_vars):
2936
id_vars = [id_vars]
@@ -32,7 +39,13 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
3239
raise ValueError('id_vars must be a list of tuples when columns'
3340
' are a MultiIndex')
3441
else:
42+
# Check that `id_vars` are in frame
3543
id_vars = list(id_vars)
44+
missing = Index(np.ravel(id_vars)).difference(cols)
45+
if not missing.empty:
46+
raise KeyError("The following 'id_vars' are not present"
47+
" in the DataFrame: {missing}"
48+
"".format(missing=list(missing)))
3649
else:
3750
id_vars = []
3851

@@ -45,6 +58,12 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None,
4558
' columns are a MultiIndex')
4659
else:
4760
value_vars = list(value_vars)
61+
# Check that `value_vars` are in frame
62+
missing = Index(np.ravel(value_vars)).difference(cols)
63+
if not missing.empty:
64+
raise KeyError("The following 'value_vars' are not present in"
65+
" the DataFrame: {missing}"
66+
"".format(missing=list(missing)))
4867
frame = frame.loc[:, id_vars + value_vars]
4968
else:
5069
frame = frame.copy()

pandas/tests/reshape/test_melt.py

+51
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,14 @@ def test_vars_work_with_multiindex(self):
101101
result = self.df1.melt(id_vars=[('A', 'a')], value_vars=[('B', 'b')])
102102
tm.assert_frame_equal(result, expected)
103103

104+
def test_single_vars_work_with_multiindex(self):
105+
expected = DataFrame({
106+
'A': {0: 1.067683, 1: -1.321405, 2: -0.807333},
107+
'CAP': {0: 'B', 1: 'B', 2: 'B'},
108+
'value': {0: -1.110463, 1: 0.368915, 2: 0.08298}})
109+
result = self.df1.melt(['A'], ['B'], col_level=0)
110+
tm.assert_frame_equal(result, expected)
111+
104112
def test_tuple_vars_fail_with_multiindex(self):
105113
# melt should fail with an informative error message if
106114
# the columns have a MultiIndex and a tuple is passed
@@ -233,6 +241,49 @@ def test_pandas_dtypes(self, col):
233241
expected.columns = ['klass', 'col', 'attribute', 'value']
234242
tm.assert_frame_equal(result, expected)
235243

244+
def test_melt_missing_columns_raises(self):
245+
# GH-23575
246+
# This test is to ensure that pandas raises an error if melting is
247+
# attempted with column names absent from the dataframe
248+
249+
# Generate data
250+
df = pd.DataFrame(np.random.randn(5, 4), columns=list('abcd'))
251+
252+
# Try to melt with missing `value_vars` column name
253+
msg = "The following '{Var}' are not present in the DataFrame: {Col}"
254+
with pytest.raises(
255+
KeyError,
256+
match=msg.format(Var='value_vars', Col="\\['C'\\]")):
257+
df.melt(['a', 'b'], ['C', 'd'])
258+
259+
# Try to melt with missing `id_vars` column name
260+
with pytest.raises(
261+
KeyError,
262+
match=msg.format(Var='id_vars', Col="\\['A'\\]")):
263+
df.melt(['A', 'b'], ['c', 'd'])
264+
265+
# Multiple missing
266+
with pytest.raises(
267+
KeyError,
268+
match=msg.format(Var='id_vars',
269+
Col="\\['not_here', 'or_there'\\]")):
270+
df.melt(['a', 'b', 'not_here', 'or_there'], ['c', 'd'])
271+
272+
# Multiindex melt fails if column is missing from multilevel melt
273+
multi = df.copy()
274+
multi.columns = [list('ABCD'), list('abcd')]
275+
with pytest.raises(
276+
KeyError,
277+
match=msg.format(Var='id_vars',
278+
Col="\\['E'\\]")):
279+
multi.melt([('E', 'a')], [('B', 'b')])
280+
# Multiindex fails if column is missing from single level melt
281+
with pytest.raises(
282+
KeyError,
283+
match=msg.format(Var='value_vars',
284+
Col="\\['F'\\]")):
285+
multi.melt(['A'], ['F'], col_level=0)
286+
236287

237288
class TestLreshape(object):
238289

0 commit comments

Comments
 (0)