diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 0ebe57bfbb3a1..cfae9a8bf504e 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -337,6 +337,7 @@ Indexing - Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`) - Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`) - Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`) +- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`) Missing ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 73906033f8f7f..3fbe171a1dade 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3525,6 +3525,9 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: n = len(row_labels) if n != len(col_labels): raise ValueError("Row labels must have same size as column labels") + if not (self.index.is_unique and self.columns.is_unique): + # GH#33041 + raise ValueError("DataFrame.lookup requires unique index and columns") thresh = 1000 if not self._is_mixed_type or n > thresh: diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 923447889d04c..a7aacc9e0968a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1424,6 +1424,24 @@ def test_lookup_raises(self, float_frame): with pytest.raises(ValueError, match="same size"): float_frame.lookup(["a", "b", "c"], ["a"]) + def test_lookup_requires_unique_axes(self): + # GH#33041 raise with a helpful error message + df = pd.DataFrame(np.random.randn(6).reshape(3, 2), columns=["A", "A"]) + + rows = [0, 1] + cols = ["A", "A"] + + # homogeneous-dtype case + with pytest.raises(ValueError, match="requires unique index and columns"): + df.lookup(rows, cols) + with pytest.raises(ValueError, match="requires unique index and columns"): + df.T.lookup(cols, rows) + + # heterogeneous dtype + df["B"] = 0 + with pytest.raises(ValueError, match="requires unique index and columns"): + df.lookup(rows, cols) + def test_set_value(self, float_frame): for idx in float_frame.index: for col in float_frame.columns: