diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index 74abbc9503db0..b11baad1e3eb5 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -1480,17 +1480,27 @@ default value. s.get('a') # equivalent to s['a'] s.get('x', default=-1) -The :meth:`~pandas.DataFrame.lookup` method -------------------------------------------- +.. _indexing.lookup: + +Looking up values by index/column labels +---------------------------------------- Sometimes you want to extract a set of values given a sequence of row labels -and column labels, and the ``lookup`` method allows for this and returns a -NumPy array. For instance: +and column labels, this can be achieved by ``DataFrame.melt`` combined by filtering the corresponding +rows with ``DataFrame.loc``. For instance: .. ipython:: python - dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D']) - dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D']) + df = pd.DataFrame({'col': ["A", "A", "B", "B"], + 'A': [80, 23, np.nan, 22], + 'B': [80, 55, 76, 67]}) + df + melt = df.melt('col') + melt = melt.loc[melt['col'] == melt['variable'], 'value'] + melt.reset_index(drop=True) + +Formerly this could be achieved with the dedicated ``DataFrame.lookup`` method +which was deprecated in version 1.2.0. .. _indexing.class: diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8b18b56929acd..41cf1218261ba 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -209,6 +209,7 @@ Deprecations - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) - Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`) - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) +- :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 56dc5e54e1d59..319003e50999f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3843,10 +3843,15 @@ def _series(self): def lookup(self, row_labels, col_labels) -> np.ndarray: """ Label-based "fancy indexing" function for DataFrame. - Given equal-length arrays of row and column labels, return an array of the values corresponding to each (row, col) pair. + .. deprecated:: 1.2.0 + DataFrame.lookup is deprecated, + use DataFrame.melt and DataFrame.loc instead. + For an example see :meth:`~pandas.DataFrame.lookup` + in the user guide. + Parameters ---------- row_labels : sequence @@ -3859,6 +3864,14 @@ def lookup(self, row_labels, col_labels) -> np.ndarray: numpy.ndarray The found values. """ + msg = ( + "The 'lookup' method is deprecated and will be" + "removed in a future version." + "You can use DataFrame.melt and DataFrame.loc" + "as a substitute." + ) + warnings.warn(msg, FutureWarning, stacklevel=2) + n = len(row_labels) if n != len(col_labels): raise ValueError("Row labels must have same size as column labels") diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index e4549dfb3e68d..b947be705a329 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1340,7 +1340,8 @@ def test_lookup_float(self, float_frame): df = float_frame rows = list(df.index) * len(df.columns) cols = list(df.columns) * len(df.index) - result = df.lookup(rows, cols) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) expected = np.array([df.loc[r, c] for r, c in zip(rows, cols)]) tm.assert_numpy_array_equal(result, expected) @@ -1349,7 +1350,8 @@ def test_lookup_mixed(self, float_string_frame): df = float_string_frame rows = list(df.index) * len(df.columns) cols = list(df.columns) * len(df.index) - result = df.lookup(rows, cols) + with tm.assert_produces_warning(FutureWarning): + result = df.lookup(rows, cols) expected = np.array( [df.loc[r, c] for r, c in zip(rows, cols)], dtype=np.object_ @@ -1365,7 +1367,8 @@ def test_lookup_bool(self): "mask_c": [False, True, False, True], } ) - df["mask"] = df.lookup(df.index, "mask_" + df["label"]) + with tm.assert_produces_warning(FutureWarning): + df["mask"] = df.lookup(df.index, "mask_" + df["label"]) exp_mask = np.array( [df.loc[r, c] for r, c in zip(df.index, "mask_" + df["label"])] @@ -1376,13 +1379,16 @@ def test_lookup_bool(self): def test_lookup_raises(self, float_frame): with pytest.raises(KeyError, match="'One or more row labels was not found'"): - float_frame.lookup(["xyz"], ["A"]) + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["xyz"], ["A"]) with pytest.raises(KeyError, match="'One or more column labels was not found'"): - float_frame.lookup([float_frame.index[0]], ["xyz"]) + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup([float_frame.index[0]], ["xyz"]) with pytest.raises(ValueError, match="same size"): - float_frame.lookup(["a", "b", "c"], ["a"]) + with tm.assert_produces_warning(FutureWarning): + float_frame.lookup(["a", "b", "c"], ["a"]) def test_lookup_requires_unique_axes(self): # GH#33041 raise with a helpful error message @@ -1393,14 +1399,17 @@ def test_lookup_requires_unique_axes(self): # homogeneous-dtype case with pytest.raises(ValueError, match="requires unique index and columns"): - df.lookup(rows, cols) + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) with pytest.raises(ValueError, match="requires unique index and columns"): - df.T.lookup(cols, rows) + with tm.assert_produces_warning(FutureWarning): + df.T.lookup(cols, rows) # heterogeneous dtype df["B"] = 0 with pytest.raises(ValueError, match="requires unique index and columns"): - df.lookup(rows, cols) + with tm.assert_produces_warning(FutureWarning): + df.lookup(rows, cols) def test_set_value(self, float_frame): for idx in float_frame.index: @@ -2232,3 +2241,12 @@ def test_object_casting_indexing_wraps_datetimelike(): assert blk.dtype == "m8[ns]" # we got the right block val = blk.iget((0, 0)) assert isinstance(val, pd.Timedelta) + + +def test_lookup_deprecated(): + # GH18262 + df = pd.DataFrame( + {"col": ["A", "A", "B", "B"], "A": [80, 23, np.nan, 22], "B": [80, 55, 76, 67]} + ) + with tm.assert_produces_warning(FutureWarning): + df.lookup(df.index, df["col"])