diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c0ca5b2320338..2ac20e97409e7 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -93,6 +93,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecating pinning ``group.name`` to each group in :meth:`SeriesGroupBy.aggregate` aggregations; if your operation requires utilizing the groupby keys, iterate over the groupby object instead (:issue:`41090`) +- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`) - Deprecated accepting slices in :meth:`DataFrame.take`, call ``obj[slicer]`` or pass a sequence of integers instead (:issue:`51539`) - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e6af875ab1c23..aa3b10df742a2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2109,6 +2109,9 @@ def from_records( ---------- data : structured ndarray, sequence of tuples or dicts, or DataFrame Structured input data. + + .. deprecated:: 2.1.0 + Passing a DataFrame is deprecated. index : str, list of fields, array-like Field of array to use as the index, alternately a specific set of input labels to use. @@ -2171,6 +2174,23 @@ def from_records( 2 1 c 3 0 d """ + if isinstance(data, DataFrame): + warnings.warn( + "Passing a DataFrame to DataFrame.from_records is deprecated. Use " + "set_index and/or drop to modify the DataFrame instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + if columns is not None: + if is_scalar(columns): + columns = [columns] + data = data[columns] + if index is not None: + data = data.set_index(index) + if exclude is not None: + data = data.drop(columns=exclude) + return data + result_index = None # Make a copy of the input columns so we can modify it @@ -2238,7 +2258,7 @@ def maybe_reorder( arrays, arr_columns, columns, index ) - elif isinstance(data, (np.ndarray, DataFrame)): + elif isinstance(data, np.ndarray): arrays, columns = to_arrays(data, columns) arr_columns = columns else: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 005c166bb1f2a..ce14145e18f06 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -769,19 +769,6 @@ def to_arrays( ----- Ensures that len(result_arrays) == len(result_index). """ - if isinstance(data, ABCDataFrame): - # see test_from_records_with_index_data, test_from_records_bad_index_column - if columns is not None: - arrays = [ - data._ixs(i, axis=1)._values - for i, col in enumerate(data.columns) - if col in columns - ] - else: - columns = data.columns - arrays = [data._ixs(i, axis=1)._values for i in range(len(columns))] - - return arrays, columns if not len(data): if isinstance(data, np.ndarray): diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 18bf633d60186..0ba4f6d249e6d 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -25,7 +25,8 @@ def test_from_records_dt64tz_frame(self): # GH#51162 don't lose tz when calling from_records with DataFrame input dti = date_range("2016-01-01", periods=10, tz="US/Pacific") df = DataFrame({i: dti for i in range(4)}) - res = DataFrame.from_records(df) + with tm.assert_produces_warning(FutureWarning): + res = DataFrame.from_records(df) tm.assert_frame_equal(res, df) def test_from_records_with_datetimes(self): @@ -177,29 +178,34 @@ def test_from_records_with_index_data(self): df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) data = np.random.randn(10) - df1 = DataFrame.from_records(df, index=data) + with tm.assert_produces_warning(FutureWarning): + df1 = DataFrame.from_records(df, index=data) tm.assert_index_equal(df1.index, Index(data)) def test_from_records_bad_index_column(self): df = DataFrame(np.random.randn(10, 3), columns=["A", "B", "C"]) # should pass - df1 = DataFrame.from_records(df, index=["C"]) + with tm.assert_produces_warning(FutureWarning): + df1 = DataFrame.from_records(df, index=["C"]) tm.assert_index_equal(df1.index, Index(df.C)) - df1 = DataFrame.from_records(df, index="C") + with tm.assert_produces_warning(FutureWarning): + df1 = DataFrame.from_records(df, index="C") tm.assert_index_equal(df1.index, Index(df.C)) # should fail msg = "|".join( [ - r"Length of values \(10\) does not match length of index \(1\)", + r"'None of \[2\] are in the columns'", ] ) - with pytest.raises(ValueError, match=msg): - DataFrame.from_records(df, index=[2]) - with pytest.raises(KeyError, match=r"^2$"): - DataFrame.from_records(df, index=2) + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + DataFrame.from_records(df, index=[2]) + with pytest.raises(KeyError, match=msg): + with tm.assert_produces_warning(FutureWarning): + DataFrame.from_records(df, index=2) def test_from_records_non_tuple(self): class Record: