diff --git a/doc/source/whatsnew/v0.20.2.txt b/doc/source/whatsnew/v0.20.2.txt index d5fd879d3f9bf..b53a9dd06b2a5 100644 --- a/doc/source/whatsnew/v0.20.2.txt +++ b/doc/source/whatsnew/v0.20.2.txt @@ -80,8 +80,10 @@ Reshaping ^^^^^^^^^ - Bug in ``DataFrame.stack`` with unsorted levels in MultiIndex columns (:issue:`16323`) +- Bug in ``pd.wide_to_long()`` where no error was raised when ``i`` was not a unique identifier (:issue:`16382`) - Bug in ``Series.isin(..)`` with a list of tuples (:issue:`16394`) + Numeric ^^^^^^^ - Bug in .interpolate(), where limit_direction was not respected when limit=None (default) was passed (:issue:16282) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b0ed6d4c4b84d..f944dfe22361a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -1046,6 +1046,9 @@ def melt_stub(df, stub, i, j, value_vars, sep): else: i = list(i) + if df[i].duplicated().any(): + raise ValueError("the id variables need to uniquely identify each row") + value_vars = list(map(lambda stub: get_var_names(df, stub, sep, suffix), stubnames)) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 79626d89026a7..d47a95924bd10 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -976,3 +976,14 @@ def test_multiple_id_columns(self): exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']] long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age') tm.assert_frame_equal(long_frame, exp_frame) + + def test_non_unique_idvars(self): + # GH16382 + # Raise an error message if non unique id vars (i) are passed + df = pd.DataFrame({ + 'A_A1': [1, 2, 3, 4, 5], + 'B_B1': [1, 2, 3, 4, 5], + 'x': [1, 1, 1, 1, 1] + }) + with pytest.raises(ValueError): + wide_to_long(df, ['A_A', 'B_B'], i='x', j='colname')