diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e83aa02f25ada..e18ba805be052 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -467,6 +467,8 @@ def sanitize_array( dtype: DtypeObj | None = None, copy: bool = False, raise_cast_failure: bool = True, + *, + allow_2d: bool = False, ) -> ArrayLike: """ Sanitize input data to an ndarray or ExtensionArray, copy if specified, @@ -479,6 +481,8 @@ def sanitize_array( dtype : np.dtype, ExtensionDtype, or None, default None copy : bool, default False raise_cast_failure : bool, default True + allow_2d : bool, default False + If False, raise if we have a 2D Arraylike. Returns ------- @@ -552,7 +556,7 @@ def sanitize_array( # "ExtensionArray") subarr = maybe_cast_to_datetime(subarr, dtype) # type: ignore[assignment] - subarr = _sanitize_ndim(subarr, data, dtype, index) + subarr = _sanitize_ndim(subarr, data, dtype, index, allow_2d=allow_2d) if not ( isinstance(subarr.dtype, ExtensionDtype) or isinstance(dtype, ExtensionDtype) @@ -570,7 +574,12 @@ def sanitize_array( def _sanitize_ndim( - result: ArrayLike, data, dtype: DtypeObj | None, index: Index | None + result: ArrayLike, + data, + dtype: DtypeObj | None, + index: Index | None, + *, + allow_2d: bool = False, ) -> ArrayLike: """ Ensure we have a 1-dimensional result array. @@ -584,6 +593,8 @@ def _sanitize_ndim( elif result.ndim > 1: if isinstance(data, np.ndarray): + if allow_2d: + return result raise ValueError("Data must be 1-dimensional") if is_object_dtype(dtype) and isinstance(dtype, ExtensionDtype): # i.e. PandasDtype("O") diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 18ee1ad9bcd96..99da79c2d643c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -94,7 +94,6 @@ infer_dtype_from_scalar, invalidate_string_dtypes, maybe_box_native, - maybe_convert_platform, maybe_downcast_to_dtype, validate_numeric_casting, ) @@ -4498,35 +4497,11 @@ def _sanitize_column(self, value) -> ArrayLike: # We should never get here with DataFrame value if isinstance(value, Series): - value = _reindex_for_setitem(value, self.index) + return _reindex_for_setitem(value, self.index) - elif isinstance(value, ExtensionArray): - # Explicitly copy here - value = value.copy() + if is_list_like(value): com.require_length_match(value, self.index) - - elif is_sequence(value): - com.require_length_match(value, self.index) - - # turn me into an ndarray - if not isinstance(value, (np.ndarray, Index)): - if isinstance(value, list) and len(value) > 0: - value = maybe_convert_platform(value) - else: - value = com.asarray_tuplesafe(value) - elif isinstance(value, Index): - value = value.copy(deep=True)._values - else: - value = value.copy() - - # possibly infer to datetimelike - if is_object_dtype(value.dtype): - value = sanitize_array(value, None) - - else: - value = construct_1d_arraylike_from_scalar(value, len(self), dtype=None) - - return value + return sanitize_array(value, self.index, copy=True, allow_2d=True) @property def _series(self): diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 35e5abe9ce4e7..a680ae5cd695c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -55,6 +55,25 @@ def _can_hold_element_patched(obj, element) -> bool: return can_hold_element(obj, element) +orig_assert_attr_equal = tm.assert_attr_equal + + +def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"): + """ + patch tm.assert_attr_equal so PandasDtype("object") is closed enough to + np.dtype("object") + """ + if attr == "dtype": + lattr = getattr(left, "dtype", None) + rattr = getattr(right, "dtype", None) + if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype): + left = left.astype(lattr.numpy_dtype) + elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype): + right = right.astype(rattr.numpy_dtype) + + orig_assert_attr_equal(attr, left, right, obj) + + @pytest.fixture(params=["float", "object"]) def dtype(request): return PandasDtype(np.dtype(request.param)) @@ -81,6 +100,7 @@ def allow_in_pandas(monkeypatch): m.setattr(PandasArray, "_typ", "extension") m.setattr(managers, "_extract_array", _extract_array_patched) m.setattr(blocks, "can_hold_element", _can_hold_element_patched) + m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal) yield diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index dd26a978fe81d..693e67652c912 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -386,58 +386,51 @@ def test_partial_set_empty_frame(self): with pytest.raises(ValueError, match=msg): df.loc[:, 1] = 1 + def test_partial_set_empty_frame2(self): # these work as they don't really change # anything but the index # GH5632 expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) - def f(): - df = DataFrame(index=Index([], dtype="object")) - df["foo"] = Series([], dtype="object") - return df + df = DataFrame(index=Index([], dtype="object")) + df["foo"] = Series([], dtype="object") - tm.assert_frame_equal(f(), expected) + tm.assert_frame_equal(df, expected) - def f(): - df = DataFrame() - df["foo"] = Series(df.index) - return df + df = DataFrame() + df["foo"] = Series(df.index) - tm.assert_frame_equal(f(), expected) + tm.assert_frame_equal(df, expected) - def f(): - df = DataFrame() - df["foo"] = df.index - return df + df = DataFrame() + df["foo"] = df.index - tm.assert_frame_equal(f(), expected) + tm.assert_frame_equal(df, expected) + def test_partial_set_empty_frame3(self): expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") - def f(): - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = [] - return df + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = [] - tm.assert_frame_equal(f(), expected) + tm.assert_frame_equal(df, expected) - def f(): - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = Series(np.arange(len(df)), dtype="float64") - return df + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = Series(np.arange(len(df)), dtype="float64") - tm.assert_frame_equal(f(), expected) + tm.assert_frame_equal(df, expected) - def f(): - df = DataFrame(index=Index([], dtype="int64")) - df["foo"] = range(len(df)) - return df + def test_partial_set_empty_frame4(self): + df = DataFrame(index=Index([], dtype="int64")) + df["foo"] = range(len(df)) expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) - expected["foo"] = expected["foo"].astype("float64") - tm.assert_frame_equal(f(), expected) + # range is int-dtype-like, so we get int64 dtype + expected["foo"] = expected["foo"].astype("int64") + tm.assert_frame_equal(df, expected) + def test_partial_set_empty_frame5(self): df = DataFrame() tm.assert_index_equal(df.columns, Index([], dtype=object)) df2 = DataFrame() @@ -446,6 +439,7 @@ def f(): tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) tm.assert_frame_equal(df, df2) + def test_partial_set_empty_frame_no_index(self): # no index to start expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0])