Skip to content

DEPR: Enforce DataFrame(list_with_categorical) deprecation #49592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Nov 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,7 @@ Removal of prior version deprecations/changes
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
- Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`)
- Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`)
- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows casting to ``object`` dtype, consistent with behavior for other types (:issue:`38845`)
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
- Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`)
Expand Down
30 changes: 1 addition & 29 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Hashable,
Sequence,
)
import warnings

import numpy as np
from numpy import ma
Expand All @@ -22,7 +21,6 @@
Manager,
npt,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import (
construct_1d_arraylike_from_scalar,
Expand Down Expand Up @@ -51,10 +49,7 @@
algorithms,
common as com,
)
from pandas.core.arrays import (
Categorical,
ExtensionArray,
)
from pandas.core.arrays import ExtensionArray
from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
Expand Down Expand Up @@ -476,9 +471,6 @@ def nested_data_to_arrays(
if index is None:
if isinstance(data[0], ABCSeries):
index = _get_names_from_index(data)
elif isinstance(data[0], Categorical):
# GH#38845 hit in test_constructor_categorical
index = default_index(len(data[0]))
else:
index = default_index(len(data))

Expand Down Expand Up @@ -795,26 +787,6 @@ def to_arrays(
return arrays, columns
return [], ensure_index([])

elif isinstance(data[0], Categorical):
# GH#38845 deprecate special case
warnings.warn(
"The behavior of DataFrame([categorical, ...]) is deprecated and "
"in a future version will be changed to match the behavior of "
"DataFrame([any_listlike, ...]). "
"To retain the old behavior, pass as a dictionary "
"DataFrame({col: categorical, ..})",
FutureWarning,
stacklevel=find_stack_level(),
)
if columns is None:
columns = default_index(len(data))
elif len(columns) > len(data):
raise ValueError("len(columns) > len(data)")
elif len(columns) < len(data):
# doing this here is akin to a pre-emptive reindex
data = data[: len(columns)]
return data, columns

elif isinstance(data, np.ndarray) and data.dtype.names is not None:
# e.g. recarray
columns = Index(list(data.dtype.names))
Expand Down
47 changes: 17 additions & 30 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2220,47 +2220,34 @@ def test_constructor_categorical(self):
tm.assert_series_equal(df[0], expected)

def test_construct_from_1item_list_of_categorical(self):
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
# Categorical special case
# ndim != 1
msg = "will be changed to match the behavior"
with tm.assert_produces_warning(FutureWarning, match=msg):
df = DataFrame([Categorical(list("abc"))])
expected = DataFrame({0: Series(list("abc"), dtype="category")})
cat = Categorical(list("abc"))
df = DataFrame([cat])
expected = DataFrame([cat.astype(object)])
tm.assert_frame_equal(df, expected)

def test_construct_from_list_of_categoricals(self):
msg = "will be changed to match the behavior"
with tm.assert_produces_warning(FutureWarning, match=msg):
df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
expected = DataFrame(
{
0: Series(list("abc"), dtype="category"),
1: Series(list("abd"), dtype="category"),
},
columns=[0, 1],
)
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
# Categorical special case

df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
expected = DataFrame([["a", "b", "c"], ["a", "b", "d"]])
tm.assert_frame_equal(df, expected)

def test_from_nested_listlike_mixed_types(self):
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
# Categorical special case
# mixed
msg = "will be changed to match the behavior"
with tm.assert_produces_warning(FutureWarning, match=msg):
df = DataFrame([Categorical(list("abc")), list("def")])
expected = DataFrame(
{0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1]
)
df = DataFrame([Categorical(list("abc")), list("def")])
expected = DataFrame([["a", "b", "c"], ["d", "e", "f"]])
tm.assert_frame_equal(df, expected)

def test_construct_from_listlikes_mismatched_lengths(self):
# invalid (shape)
msg = "|".join(
[
r"Length of values \(6\) does not match length of index \(3\)",
]
)
msg2 = "will be changed to match the behavior"
with pytest.raises(ValueError, match=msg):
with tm.assert_produces_warning(FutureWarning, match=msg2):
DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
df = DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
expected = DataFrame([list("abc"), list("abdefg")])
tm.assert_frame_equal(df, expected)

def test_constructor_categorical_series(self):

Expand Down