diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index de3a05a2ccdfb..c38712b353b8d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -922,6 +922,7 @@ Datetimelike resolution which converted to object dtype instead of coercing to ``datetime64[ns]`` dtype when within the timestamp bounds (:issue:`34843`). - The ``freq`` keyword in :class:`Period`, :func:`date_range`, :func:`period_range`, :func:`pd.tseries.frequencies.to_offset` no longer allows tuples, pass as string instead (:issue:`34703`) +- Bug in :meth:`DataFrame.append` when appending a :class:`Series` containing a scalar tz-aware :class:`Timestamp` to an empty :class:`DataFrame` resulted in an object column instead of datetime64[ns, tz] dtype (:issue:`35038`) - ``OutOfBoundsDatetime`` issues an improved error message when timestamp is out of implementation bounds. (:issue:`32967`) Timedelta diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4b7c818f487ac..9902016475b22 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -152,11 +152,11 @@ def is_nonempty(x) -> bool: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] - if isinstance(to_concat[0], ExtensionArray): + if isinstance(to_concat[0], ExtensionArray) and axis == 0: cls = type(to_concat[0]) return cls._concat_same_type(to_concat) else: - return np.concatenate(to_concat) + return np.concatenate(to_concat, axis=axis) elif _contains_datetime or "timedelta" in typs: return concat_datetime(to_concat, axis=axis, typs=typs) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 2cc7461986c8f..2c0d4931a7bf2 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -333,7 +333,7 @@ def _concatenate_join_units(join_units, concat_axis, copy): # concatting with at least one EA means we are concatting a single column # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] - concat_values = concat_compat(to_concat, axis=concat_axis) + concat_values = concat_compat(to_concat, axis=0) if not isinstance(concat_values, ExtensionArray): # if the result of concat is not an EA but an ndarray, reshape to # 2D to put it a non-EA Block diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ffeb5ff0f8aaa..0159fabd04d59 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1087,20 +1087,27 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): date = Timestamp("2018-10-24 07:30:00", tz=dateutil.tz.tzutc()) s = Series({"date": date, "a": 1.0, "b": 2.0}) df = DataFrame(columns=["c", "d"]) - result = df.append(s, ignore_index=True) - # n.b. it's not clear to me that expected is correct here. - # It's possible that the `date` column should have - # datetime64[ns, tz] dtype for both result and expected. - # that would be more consistent with new columns having - # their own dtype (float for a and b, datetime64ns, tz for date). + result_a = df.append(s, ignore_index=True) expected = DataFrame( - [[np.nan, np.nan, 1.0, 2.0, date]], - columns=["c", "d", "a", "b", "date"], - dtype=object, + [[np.nan, np.nan, 1.0, 2.0, date]], columns=["c", "d", "a", "b", "date"] ) # These columns get cast to object after append - expected["a"] = expected["a"].astype(float) - expected["b"] = expected["b"].astype(float) + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) + tm.assert_frame_equal(result_a, expected) + + expected = DataFrame( + [[np.nan, np.nan, 1.0, 2.0, date]] * 2, columns=["c", "d", "a", "b", "date"] + ) + expected["c"] = expected["c"].astype(object) + expected["d"] = expected["d"].astype(object) + + result_b = result_a.append(s, ignore_index=True) + tm.assert_frame_equal(result_b, expected) + + # column order is different + expected = expected[["c", "d", "date", "a", "b"]] + result = df.append([s, s], ignore_index=True) tm.assert_frame_equal(result, expected)