From a7ba96537e0a4f5e58acf02747c4b05cef02b4b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 8 Nov 2022 16:17:35 -0800 Subject: [PATCH 1/2] DEPR: Enforce DataFrame(list_with_categorical) deprecation --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/internals/construction.py | 26 -------------- pandas/tests/frame/test_constructors.py | 47 +++++++++---------------- 3 files changed, 18 insertions(+), 56 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 05377c7b12e78..e8ba4a4b1eef4 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -479,6 +479,7 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) - Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`) +- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows, consistent with behavior for other types (:issue:`38845`) - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c1745630602ab..97e6c5df29e50 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -10,7 +10,6 @@ Hashable, Sequence, ) -import warnings import numpy as np from numpy import ma @@ -22,7 +21,6 @@ Manager, npt, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import ( construct_1d_arraylike_from_scalar, @@ -52,7 +50,6 @@ common as com, ) from pandas.core.arrays import ( - Categorical, DatetimeArray, ExtensionArray, TimedeltaArray, @@ -472,9 +469,6 @@ def nested_data_to_arrays( if index is None: if isinstance(data[0], ABCSeries): index = _get_names_from_index(data) - elif isinstance(data[0], Categorical): - # GH#38845 hit in test_constructor_categorical - index = default_index(len(data[0])) else: index = default_index(len(data)) @@ -792,26 +786,6 @@ def to_arrays( return arrays, columns return [], ensure_index([]) - elif isinstance(data[0], Categorical): - # GH#38845 deprecate special case - warnings.warn( - "The behavior of DataFrame([categorical, ...]) is deprecated and " - "in a future version will be changed to match the behavior of " - "DataFrame([any_listlike, ...]). " - "To retain the old behavior, pass as a dictionary " - "DataFrame({col: categorical, ..})", - FutureWarning, - stacklevel=find_stack_level(), - ) - if columns is None: - columns = default_index(len(data)) - elif len(columns) > len(data): - raise ValueError("len(columns) > len(data)") - elif len(columns) < len(data): - # doing this here is akin to a pre-emptive reindex - data = data[: len(columns)] - return data, columns - elif isinstance(data, np.ndarray) and data.dtype.names is not None: # e.g. recarray columns = Index(list(data.dtype.names)) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 17a76decce3c7..810b7f6eaf2a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2220,47 +2220,34 @@ def test_constructor_categorical(self): tm.assert_series_equal(df[0], expected) def test_construct_from_1item_list_of_categorical(self): + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case # ndim != 1 - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc"))]) - expected = DataFrame({0: Series(list("abc"), dtype="category")}) + cat = Categorical(list("abc")) + df = DataFrame([cat]) + expected = DataFrame([cat.astype(object)]) tm.assert_frame_equal(df, expected) def test_construct_from_list_of_categoricals(self): - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) - expected = DataFrame( - { - 0: Series(list("abc"), dtype="category"), - 1: Series(list("abd"), dtype="category"), - }, - columns=[0, 1], - ) + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case + + df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) + expected = DataFrame([["a", "b", "c"], ["a", "b", "d"]]) tm.assert_frame_equal(df, expected) def test_from_nested_listlike_mixed_types(self): + # pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove + # Categorical special case # mixed - msg = "will be changed to match the behavior" - with tm.assert_produces_warning(FutureWarning, match=msg): - df = DataFrame([Categorical(list("abc")), list("def")]) - expected = DataFrame( - {0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1] - ) + df = DataFrame([Categorical(list("abc")), list("def")]) + expected = DataFrame([["a", "b", "c"], ["d", "e", "f"]]) tm.assert_frame_equal(df, expected) def test_construct_from_listlikes_mismatched_lengths(self): - # invalid (shape) - msg = "|".join( - [ - r"Length of values \(6\) does not match length of index \(3\)", - ] - ) - msg2 = "will be changed to match the behavior" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + df = DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + expected = DataFrame([list("abc"), list("abdefg")]) + tm.assert_frame_equal(df, expected) def test_constructor_categorical_series(self): From 142182ced6e0d183a350e49ddd90657d400fd09a Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 9 Nov 2022 09:01:58 -0800 Subject: [PATCH 2/2] Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e8ba4a4b1eef4..4e840513bbee2 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -479,7 +479,7 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`) - Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`) - Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`) -- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows, consistent with behavior for other types (:issue:`38845`) +- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows casting to ``object`` dtype, consistent with behavior for other types (:issue:`38845`) - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`)