diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1eb22436204a8..a80f90924c3a5 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -648,6 +648,7 @@ Deprecations - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) - Deprecated behavior of :meth:`DatetimeIndex.union` with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`) - Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) +- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`) - Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 06d30d6ed72e8..ed4b9797ca702 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -11,6 +11,7 @@ Hashable, Sequence, ) +import warnings import numpy as np import numpy.ma as ma @@ -772,6 +773,16 @@ def to_arrays( return [], ensure_index([]) elif isinstance(data[0], Categorical): + # GH#38845 deprecate special case + warnings.warn( + "The behavior of DataFrame([categorical, ...]) is deprecated and " + "in a future version will be changed to match the behavior of " + "DataFrame([any_listlike, ...]). " + "To retain the old behavior, pass as a dictionary " + "DataFrame({col: categorical, ..})", + FutureWarning, + stacklevel=4, + ) if columns is None: columns = ibase.default_index(len(data)) return data, columns diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 10d55053d5bc6..33e7c1643d18d 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4571,7 +4571,7 @@ def read( df = DataFrame(values, columns=cols_, index=index_) else: # Categorical - df = DataFrame([values], columns=cols_, index=index_) + df = DataFrame._from_arrays([values], columns=cols_, index=index_) assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) frames.append(df) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6e9991ff17ac3..179d1bca7223f 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2089,12 +2089,16 @@ def test_constructor_categorical(self): def test_construct_from_1item_list_of_categorical(self): # ndim != 1 - df = DataFrame([Categorical(list("abc"))]) + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc"))]) expected = DataFrame({0: Series(list("abc"), dtype="category")}) tm.assert_frame_equal(df, expected) def test_construct_from_list_of_categoricals(self): - df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) expected = DataFrame( { 0: Series(list("abc"), dtype="category"), @@ -2106,7 +2110,9 @@ def test_construct_from_list_of_categoricals(self): def test_from_nested_listlike_mixed_types(self): # mixed - df = DataFrame([Categorical(list("abc")), list("def")]) + msg = "will be changed to match the behavior" + with tm.assert_produces_warning(FutureWarning, match=msg): + df = DataFrame([Categorical(list("abc")), list("def")]) expected = DataFrame( {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] ) @@ -2120,8 +2126,10 @@ def test_construct_from_listlikes_mismatched_lengths(self): "Passed arrays should have the same length as the rows Index", ] ) + msg2 = "will be changed to match the behavior" with pytest.raises(ValueError, match=msg): - DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + with tm.assert_produces_warning(FutureWarning, match=msg2): + DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) def test_constructor_categorical_series(self):