diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 07a4e4af3dbe7..9ecddf1acaa32 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -482,6 +482,7 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) - Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`) +- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows casting to ``object`` dtype, consistent with behavior for other types (:issue:`38845`) - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 761a641ccb2f7..8e186b1f4a034 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -10,7 +10,6 @@ Hashable, Sequence, ) -import warnings import numpy as np from numpy import ma @@ -22,7 +21,6 @@ Manager, npt, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -51,10 +49,7 @@ algorithms, common as com, ) -from pandas.core.arrays import ( - Categorical, - ExtensionArray, -) +from pandas.core.arrays import ExtensionArray from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, @@ -476,9 +471,6 @@ def nested_data_to_arrays( if index is None: if isinstance(data[0], ABCSeries): index = _get_names_from_index(data) - elif isinstance(data[0], Categorical): - # GH#38845 hit in test_constructor_categorical - index = default_index(len(data[0])) else: index = default_index(len(data)) @@ -795,26 +787,6 @@ def to_arrays( return arrays, columns return [], ensure_index([]) - elif isinstance(data[0], Categorical): - # GH#38845 deprecate special case - warnings.warn( - "The behavior of DataFrame([categorical, ...]) is deprecated and " - "in a future version will be changed to match the behavior of " - "DataFrame([any_listlike, ...]). " - "To retain the old behavior, pass as a dictionary " - "DataFrame({col: categorical, ..})", - FutureWarning, - stacklevel=find_stack_level(), - ) - if columns is None: - columns = default_index(len(data)) - elif len(columns) > len(data): - raise ValueError("len(columns) > len(data)") - elif len(columns) < len(data): - # doing this here is akin to a pre-emptive reindex - data = data[: len(columns)] - return data, columns - elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray columns = Index(list(data.dtype.names)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 17a76decce3c7..810b7f6eaf2a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2220,47 +2220,34 @@ def test_constructor_categorical(self): tm.assert_series_equal(df[0], expected) def test_construct_from_1item_list_of_categorical(self): + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case # ndim != 1 - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc"))]) - expected = DataFrame({0: Series(list("abc"), dtype="category")}) + cat = Categorical(list("abc")) + df = DataFrame([cat]) + expected = DataFrame([cat.astype(object)]) tm.assert_frame_equal(df, expected) def test_construct_from_list_of_categoricals(self): - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) - expected = DataFrame( - { - 0: Series(list("abc"), dtype="category"), - 1: Series(list("abd"), dtype="category"), - }, - columns=[0, 1], - ) + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case + + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + expected = DataFrame([["a", "b", "c"], ["a", "b", "d"]]) tm.assert_frame_equal(df, expected) def test_from_nested_listlike_mixed_types(self): + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case # mixed - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc")), list("def")]) - expected = DataFrame( - {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] - ) + df = DataFrame([Categorical(list("abc")), list("def")]) + expected = DataFrame([["a", "b", "c"], ["d", "e", "f"]]) tm.assert_frame_equal(df, expected) def test_construct_from_listlikes_mismatched_lengths(self): - # invalid (shape) - msg = "|".join( - [ - r"Length of values \(6\) does not match length of index \(3\)", - ] - ) - msg2 = "will be changed to match the behavior" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + df = DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + expected = DataFrame([list("abc"), list("abdefg")]) + tm.assert_frame_equal(df, expected) def test_constructor_categorical_series(self):