Skip to content

Commit a648fbf

Browse files
jbrockmendelluckyvs1
authored andcommitted
BUG: DataFrame(ndarray, dtype=categoricaldtype) (pandas-dev#38857)
* BUG: DataFrame(ndarray, dtype=categoricaldtype) * whatsnew * GH ref * elif->if
1 parent 42e1419 commit a648fbf

File tree

4 files changed

+29
-22
lines changed

4 files changed

+29
-22
lines changed

doc/source/whatsnew/v1.3.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ Categorical
184184
^^^^^^^^^^^
185185
- Bug in :class:`CategoricalIndex` incorrectly failing to raise ``TypeError`` when scalar data is passed (:issue:`38614`)
186186
- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
187-
- Bug where construcing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)
188-
187+
- Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)
188+
- Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`)
189189

190190
Datetimelike
191191
^^^^^^^^^^^^

pandas/core/internals/construction.py

+5-17
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
maybe_upcast,
2222
)
2323
from pandas.core.dtypes.common import (
24-
is_categorical_dtype,
2524
is_datetime64tz_dtype,
2625
is_dtype_equal,
2726
is_extension_array_dtype,
@@ -160,21 +159,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
160159
if not len(values) and columns is not None and len(columns):
161160
values = np.empty((0, 1), dtype=object)
162161

163-
# we could have a categorical type passed or coerced to 'category'
164-
# recast this to an arrays_to_mgr
165-
if is_categorical_dtype(getattr(values, "dtype", None)) or is_categorical_dtype(
166-
dtype
167-
):
168-
169-
if not hasattr(values, "dtype"):
170-
values = _prep_ndarray(values, copy=copy)
171-
values = values.ravel()
172-
elif copy:
173-
values = values.copy()
174-
175-
index, columns = _get_axes(len(values), 1, index, columns)
176-
return arrays_to_mgr([values], columns, index, columns, dtype=dtype)
177-
elif is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
162+
if is_extension_array_dtype(values) or is_extension_array_dtype(dtype):
178163
# GH#19157
179164

180165
if isinstance(values, np.ndarray) and values.ndim > 1:
@@ -308,6 +293,7 @@ def nested_data_to_arrays(
308293
if isinstance(data[0], ABCSeries):
309294
index = _get_names_from_index(data)
310295
elif isinstance(data[0], Categorical):
296+
# GH#38845 hit in test_constructor_categorical
311297
index = ibase.default_index(len(data[0]))
312298
else:
313299
index = ibase.default_index(len(data))
@@ -486,7 +472,9 @@ def _get_names_from_index(data):
486472
return index
487473

488474

489-
def _get_axes(N, K, index, columns) -> Tuple[Index, Index]:
475+
def _get_axes(
476+
N: int, K: int, index: Optional[Index], columns: Optional[Index]
477+
) -> Tuple[Index, Index]:
490478
# helper to create the axes as indexes
491479
# return axes or defaults
492480

pandas/tests/frame/test_constructors.py

+16
Original file line numberDiff line numberDiff line change
@@ -1890,6 +1890,16 @@ def test_constructor_lists_to_object_dtype(self):
18901890
assert d["a"].dtype == np.object_
18911891
assert not d["a"][1]
18921892

1893+
def test_constructor_ndarray_categorical_dtype(self):
1894+
cat = Categorical(["A", "B", "C"])
1895+
arr = np.array(cat).reshape(-1, 1)
1896+
arr = np.broadcast_to(arr, (3, 4))
1897+
1898+
result = DataFrame(arr, dtype=cat.dtype)
1899+
1900+
expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat})
1901+
tm.assert_frame_equal(result, expected)
1902+
18931903
def test_constructor_categorical(self):
18941904

18951905
# GH8626
@@ -1913,11 +1923,13 @@ def test_constructor_categorical(self):
19131923
expected = Series(list("abc"), dtype="category", name=0)
19141924
tm.assert_series_equal(df[0], expected)
19151925

1926+
def test_construct_from_1item_list_of_categorical(self):
19161927
# ndim != 1
19171928
df = DataFrame([Categorical(list("abc"))])
19181929
expected = DataFrame({0: Series(list("abc"), dtype="category")})
19191930
tm.assert_frame_equal(df, expected)
19201931

1932+
def test_construct_from_list_of_categoricals(self):
19211933
df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))])
19221934
expected = DataFrame(
19231935
{
@@ -1928,18 +1940,22 @@ def test_constructor_categorical(self):
19281940
)
19291941
tm.assert_frame_equal(df, expected)
19301942

1943+
def test_from_nested_listlike_mixed_types(self):
19311944
# mixed
19321945
df = DataFrame([Categorical(list("abc")), list("def")])
19331946
expected = DataFrame(
19341947
{0: Series(list("abc"), dtype="category"), 1: list("def")}, columns=[0, 1]
19351948
)
19361949
tm.assert_frame_equal(df, expected)
19371950

1951+
def test_construct_from_listlikes_mismatched_lengths(self):
19381952
# invalid (shape)
19391953
msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)"
19401954
with pytest.raises(ValueError, match=msg):
19411955
DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))])
19421956

1957+
def test_categorical_1d_only(self):
1958+
# TODO: belongs in Categorical tests
19431959
# ndim > 1
19441960
msg = "> 1 ndim Categorical are not supported at this time"
19451961
with pytest.raises(NotImplementedError, match=msg):

pandas/tests/series/test_constructors.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -326,13 +326,16 @@ def test_constructor_categorical(self):
326326
expected = Series([1, 2, 3], dtype="int64")
327327
tm.assert_series_equal(result, expected)
328328

329+
def test_construct_from_categorical_with_dtype(self):
329330
# GH12574
330331
cat = Series(Categorical([1, 2, 3]), dtype="category")
331332
assert is_categorical_dtype(cat)
332333
assert is_categorical_dtype(cat.dtype)
333-
s = Series([1, 2, 3], dtype="category")
334-
assert is_categorical_dtype(s)
335-
assert is_categorical_dtype(s.dtype)
334+
335+
def test_construct_intlist_values_category_dtype(self):
336+
ser = Series([1, 2, 3], dtype="category")
337+
assert is_categorical_dtype(ser)
338+
assert is_categorical_dtype(ser.dtype)
336339

337340
def test_constructor_categorical_with_coercion(self):
338341
factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])

0 commit comments

Comments
 (0)