Skip to content

Commit b92267b

Browse files
DEPR: Enforce DataFrame(list_with_categorical) deprecation (#49592)
* DEPR: Enforce DataFrame(list_with_categorical) deprecation * Update doc/source/whatsnew/v2.0.0.rst Co-authored-by: Matthew Roeschke <[email protected]> Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 8f869f3 commit b92267b

File tree

3 files changed

+19
-59
lines changed

3 files changed

+19
-59
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,7 @@ Removal of prior version deprecations/changes
482482
- Changed behavior of :class:`DataFrame` constructor given floating-point ``data`` and an integer ``dtype``, when the data cannot be cast losslessly, the floating point dtype is retained, matching :class:`Series` behavior (:issue:`41170`)
483483
- Changed behavior of :class:`Index` constructor when given a ``np.ndarray`` with object-dtype containing numeric entries; this now retains object dtype rather than inferring a numeric dtype, consistent with :class:`Series` behavior (:issue:`42870`)
484484
- Changed behavior of :meth:`Index.__and__`, :meth:`Index.__or__` and :meth:`Index.__xor__` to behave as logical operations (matching :class:`Series` behavior) instead of aliases for set operations (:issue:`37374`)
485+
- Changed behavior of :class:`DataFrame` constructor when passed a list whose first element is a :class:`Categorical`, this now treats the elements as rows casting to ``object`` dtype, consistent with behavior for other types (:issue:`38845`)
485486
- Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`)
486487
- Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`)
487488
- Changed behavior of :class:`Timestamp` constructor with a ``np.datetime64`` object and a ``tz`` passed to interpret the input as a wall-time as opposed to a UTC time (:issue:`42288`)

pandas/core/internals/construction.py

+1-29
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
Hashable,
1111
Sequence,
1212
)
13-
import warnings
1413

1514
import numpy as np
1615
from numpy import ma
@@ -22,7 +21,6 @@
2221
Manager,
2322
npt,
2423
)
25-
from pandas.util._exceptions import find_stack_level
2624

2725
from pandas.core.dtypes.cast import (
2826
construct_1d_arraylike_from_scalar,
@@ -51,10 +49,7 @@
5149
algorithms,
5250
common as com,
5351
)
54-
from pandas.core.arrays import (
55-
Categorical,
56-
ExtensionArray,
57-
)
52+
from pandas.core.arrays import ExtensionArray
5853
from pandas.core.construction import (
5954
ensure_wrapped_if_datetimelike,
6055
extract_array,
@@ -476,9 +471,6 @@ def nested_data_to_arrays(
476471
if index is None:
477472
if isinstance(data[0], ABCSeries):
478473
index = _get_names_from_index(data)
479-
elif isinstance(data[0], Categorical):
480-
# GH#38845 hit in test_constructor_categorical
481-
index = default_index(len(data[0]))
482474
else:
483475
index = default_index(len(data))
484476

@@ -795,26 +787,6 @@ def to_arrays(
795787
return arrays, columns
796788
return [], ensure_index([])
797789

798-
elif isinstance(data[0], Categorical):
799-
# GH#38845 deprecate special case
800-
warnings.warn(
801-
"The behavior of DataFrame([categorical, ...]) is deprecated and "
802-
"in a future version will be changed to match the behavior of "
803-
"DataFrame([any_listlike, ...]). "
804-
"To retain the old behavior, pass as a dictionary "
805-
"DataFrame({col: categorical, ..})",
806-
FutureWarning,
807-
stacklevel=find_stack_level(),
808-
)
809-
if columns is None:
810-
columns = default_index(len(data))
811-
elif len(columns) > len(data):
812-
raise ValueError("len(columns) > len(data)")
813-
elif len(columns) < len(data):
814-
# doing this here is akin to a pre-emptive reindex
815-
data = data[: len(columns)]
816-
return data, columns
817-
818790
elif isinstance(data, np.ndarray) and data.dtype.names is not None:
819791
# e.g. recarray
820792
columns = Index(list(data.dtype.names))

pandas/tests/frame/test_constructors.py

+17-30
Original file line numberDiff line numberDiff line change
@@ -2220,47 +2220,34 @@ def test_constructor_categorical(self):
22202220
tm.assert_series_equal(df[0], expected)
22212221

22222222
def test_construct_from_1item_list_of_categorical(self):
2223+
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
2224+
# Categorical special case
22232225
# ndim != 1
2224-
msg = "will be changed to match the behavior"
2225-
with tm.assert_produces_warning(FutureWarning, match=msg):
2226-
df = DataFrame([Categorical(list("abc"))])
2227-
expected = DataFrame({0: Series(list("abc"), dtype="category")})
2226+
cat = Categorical(list("abc"))
2227+
df = DataFrame([cat])
2228+
expected = DataFrame([cat.astype(object)])
22282229
tm.assert_frame_equal(df, expected)
22292230

22302231
def test_construct_from_list_of_categoricals(self):
2231-
msg = "will be changed to match the behavior"
2232-
with tm.assert_produces_warning(FutureWarning, match=msg):
2233-
df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
2234-
expected = DataFrame(
2235-
{
2236-
0: Series(list("abc"), dtype="category"),
2237-
1: Series(list("abd"), dtype="category"),
2238-
},
2239-
columns=[0, 1],
2240-
)
2232+
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
2233+
# Categorical special case
2234+
2235+
df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
2236+
expected = DataFrame([["a", "b", "c"], ["a", "b", "d"]])
22412237
tm.assert_frame_equal(df, expected)
22422238

22432239
def test_from_nested_listlike_mixed_types(self):
2240+
# pre-2.0 this behaved as DataFrame({0: cat}), in 2.0 we remove
2241+
# Categorical special case
22442242
# mixed
2245-
msg = "will be changed to match the behavior"
2246-
with tm.assert_produces_warning(FutureWarning, match=msg):
2247-
df = DataFrame([Categorical(list("abc")), list("def")])
2248-
expected = DataFrame(
2249-
{0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1]
2250-
)
2243+
df = DataFrame([Categorical(list("abc")), list("def")])
2244+
expected = DataFrame([["a", "b", "c"], ["d", "e", "f"]])
22512245
tm.assert_frame_equal(df, expected)
22522246

22532247
def test_construct_from_listlikes_mismatched_lengths(self):
2254-
# invalid (shape)
2255-
msg = "|".join(
2256-
[
2257-
r"Length of values \(6\) does not match length of index \(3\)",
2258-
]
2259-
)
2260-
msg2 = "will be changed to match the behavior"
2261-
with pytest.raises(ValueError, match=msg):
2262-
with tm.assert_produces_warning(FutureWarning, match=msg2):
2263-
DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
2248+
df = DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
2249+
expected = DataFrame([list("abc"), list("abdefg")])
2250+
tm.assert_frame_equal(df, expected)
22642251

22652252
def test_constructor_categorical_series(self):
22662253

0 commit comments

Comments
 (0)