diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 049c4fe653107..9981b2c4b395e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -184,8 +184,8 @@ Categorical ^^^^^^^^^^^ - Bug in :class:`CategoricalIndex` incorrectly failing to raise ``TypeError`` when scalar data is passed (:issue:`38614`) - Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`) -- Bug where construcing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) - +- Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) +- Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d59cfc436f13d..f1cd221bae15c 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -21,7 +21,6 @@ maybe_upcast, ) from pandas.core.dtypes.common import ( - is_categorical_dtype, is_datetime64tz_dtype, is_dtype_equal, is_extension_array_dtype, @@ -160,21 +159,7 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool): if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) - # we could have a categorical type passed or coerced to 'category' - # recast this to an arrays_to_mgr - if is_categorical_dtype(getattr(values, "dtype", None)) or is_categorical_dtype( - dtype - ): - - if not hasattr(values, "dtype"): - values = _prep_ndarray(values, copy=copy) - values = values.ravel() - elif copy: - values = values.copy() - - index, columns = _get_axes(len(values), 1, index, columns) - return arrays_to_mgr([values], columns, index, columns, dtype=dtype) - elif is_extension_array_dtype(values) or is_extension_array_dtype(dtype): + if is_extension_array_dtype(values) or is_extension_array_dtype(dtype): # GH#19157 if isinstance(values, np.ndarray) and values.ndim > 1: @@ -308,6 +293,7 @@ def nested_data_to_arrays( if isinstance(data[0], ABCSeries): index = _get_names_from_index(data) elif isinstance(data[0], Categorical): + # GH#38845 hit in test_constructor_categorical index = ibase.default_index(len(data[0])) else: index = ibase.default_index(len(data)) @@ -486,7 +472,9 @@ def _get_names_from_index(data): return index -def _get_axes(N, K, index, columns) -> Tuple[Index, Index]: +def _get_axes( + N: int, K: int, index: Optional[Index], columns: Optional[Index] +) -> Tuple[Index, Index]: # helper to create the axes as indexes # return axes or defaults diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 4d57b43df2387..f408a3ddde04e 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1890,6 +1890,16 @@ def test_constructor_lists_to_object_dtype(self): assert d["a"].dtype == np.object_ assert not d["a"][1] + def test_constructor_ndarray_categorical_dtype(self): + cat = Categorical(["A", "B", "C"]) + arr = np.array(cat).reshape(-1, 1) + arr = np.broadcast_to(arr, (3, 4)) + + result = DataFrame(arr, dtype=cat.dtype) + + expected = DataFrame({0: cat, 1: cat, 2: cat, 3: cat}) + tm.assert_frame_equal(result, expected) + def test_constructor_categorical(self): # GH8626 @@ -1913,11 +1923,13 @@ def test_constructor_categorical(self): expected = Series(list("abc"), dtype="category", name=0) tm.assert_series_equal(df[0], expected) + def test_construct_from_1item_list_of_categorical(self): # ndim != 1 df = DataFrame([Categorical(list("abc"))]) expected = DataFrame({0: Series(list("abc"), dtype="category")}) tm.assert_frame_equal(df, expected) + def test_construct_from_list_of_categoricals(self): df = DataFrame([Categorical(list("abc")), Categorical(list("abd"))]) expected = DataFrame( { @@ -1928,6 +1940,7 @@ def test_constructor_categorical(self): ) tm.assert_frame_equal(df, expected) + def test_from_nested_listlike_mixed_types(self): # mixed df = DataFrame([Categorical(list("abc")), list("def")]) expected = DataFrame( @@ -1935,11 +1948,14 @@ def test_constructor_categorical(self): ) tm.assert_frame_equal(df, expected) + def test_construct_from_listlikes_mismatched_lengths(self): # invalid (shape) msg = r"Shape of passed values is \(6, 2\), indices imply \(3, 2\)" with pytest.raises(ValueError, match=msg): DataFrame([Categorical(list("abc")), Categorical(list("abdefg"))]) + def test_categorical_1d_only(self): + # TODO: belongs in Categorical tests # ndim > 1 msg = "> 1 ndim Categorical are not supported at this time" with pytest.raises(NotImplementedError, match=msg): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ca7a171947ca0..c7bd38bbd00b9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -326,13 +326,16 @@ def test_constructor_categorical(self): expected = Series([1, 2, 3], dtype="int64") tm.assert_series_equal(result, expected) + def test_construct_from_categorical_with_dtype(self): # GH12574 cat = Series(Categorical([1, 2, 3]), dtype="category") assert is_categorical_dtype(cat) assert is_categorical_dtype(cat.dtype) - s = Series([1, 2, 3], dtype="category") - assert is_categorical_dtype(s) - assert is_categorical_dtype(s.dtype) + + def test_construct_intlist_values_category_dtype(self): + ser = Series([1, 2, 3], dtype="category") + assert is_categorical_dtype(ser) + assert is_categorical_dtype(ser.dtype) def test_constructor_categorical_with_coercion(self): factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"])