From 6acc0e23023968ccf525669803a2eb73cecb8b97 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Sep 2020 15:59:16 +0200 Subject: [PATCH 1/5] REGR: append tz-aware DataFrame with tz-naive values --- doc/source/whatsnew/v1.1.2.rst | 1 + pandas/core/dtypes/concat.py | 4 ++-- pandas/core/internals/concat.py | 6 +++++- pandas/tests/reshape/test_concat.py | 17 +++++++++++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index ac9fe9d2fca26..6884069081ea9 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -16,6 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`) +- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`) - diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 9902016475b22..259b3aa6a0cfe 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -152,11 +152,11 @@ def is_nonempty(x) -> bool: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] - if isinstance(to_concat[0], ExtensionArray) and axis == 0: + if isinstance(to_concat[0], ExtensionArray): # and axis == 0: cls = type(to_concat[0]) return cls._concat_same_type(to_concat) else: - return np.concatenate(to_concat, axis=axis) + return np.concatenate(to_concat) # , axis=axis) elif _contains_datetime or "timedelta" in typs: return concat_datetime(to_concat, axis=axis, typs=typs) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 88839d2211f81..0f0cc89de0a53 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -335,9 +335,13 @@ def _concatenate_join_units(join_units, concat_axis, copy): # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0) - if not isinstance(concat_values, ExtensionArray): + if not isinstance( + concat_values, ExtensionArray + ) or concat_values.dtype == np.dtype("M8[ns]"): # if the result of concat is not an EA but an ndarray, reshape to # 2D to put it a non-EA Block + # special case DatetimeArray, which *is* an EA, but is put in a + # consolidated 2D block concat_values = np.atleast_2d(concat_values) else: concat_values = concat_compat(to_concat, axis=concat_axis) diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 38cf2cc2402a1..90705f827af25 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1110,6 +1110,23 @@ def test_append_empty_frame_to_series_with_dateutil_tz(self): result = df.append([s, s], ignore_index=True) tm.assert_frame_equal(result, expected) + def test_append_empty_tz_frame_with_datetime64ns(self): + # https://github.com/pandas-dev/pandas/issues/35460 + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + + # pd.NaT gets inferred as tz-naive, so append result is tz-naive + result = df.append({"a": pd.NaT}, ignore_index=True) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # also test with typed value to append + df = pd.DataFrame(columns=["a"]).astype("datetime64[ns, UTC]") + result = df.append( + pd.Series({"a": pd.NaT}, dtype="datetime64[ns]"), ignore_index=True + ) + expected = pd.DataFrame({"a": [pd.NaT]}).astype("datetime64[ns]") + tm.assert_frame_equal(result, expected) + class TestConcatenate: def test_concat_copy(self): From 95d944bc9f7161f287f5474b98874eefcbe4575d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Sep 2020 16:47:05 +0200 Subject: [PATCH 2/5] clean-up, test assert --- pandas/core/dtypes/concat.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 259b3aa6a0cfe..4669e822c45ef 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -148,15 +148,18 @@ def is_nonempty(x) -> bool: any_ea = any(is_extension_array_dtype(x.dtype) for x in to_concat) if any_ea: + # we ignore axis here, as internally concatting with EAs is always + # for axis=0 + assert axis == 0 if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] - if isinstance(to_concat[0], ExtensionArray): # and axis == 0: + if isinstance(to_concat[0], ExtensionArray): cls = type(to_concat[0]) return cls._concat_same_type(to_concat) else: - return np.concatenate(to_concat) # , axis=axis) + return np.concatenate(to_concat) elif _contains_datetime or "timedelta" in typs: return concat_datetime(to_concat, axis=axis, typs=typs) From 7625a10ccbb4a8b51e248b723d196ca73fd27879 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 4 Sep 2020 21:56:57 +0200 Subject: [PATCH 3/5] change check for DatetimeArray --- pandas/core/dtypes/concat.py | 2 +- pandas/core/internals/concat.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 4669e822c45ef..171741adda5aa 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -150,7 +150,7 @@ def is_nonempty(x) -> bool: if any_ea: # we ignore axis here, as internally concatting with EAs is always # for axis=0 - assert axis == 0 + # assert axis == 0 if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat] diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 0f0cc89de0a53..a13a0d73d434f 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algos -from pandas.core.arrays import ExtensionArray +from pandas.core.arrays import DatetimeArray, ExtensionArray from pandas.core.internals.blocks import make_block from pandas.core.internals.managers import BlockManager @@ -335,9 +335,9 @@ def _concatenate_join_units(join_units, concat_axis, copy): # the non-EA values are 2D arrays with shape (1, n) to_concat = [t if isinstance(t, ExtensionArray) else t[0, :] for t in to_concat] concat_values = concat_compat(to_concat, axis=0) - if not isinstance( - concat_values, ExtensionArray - ) or concat_values.dtype == np.dtype("M8[ns]"): + if not isinstance(concat_values, ExtensionArray) or ( + isinstance(concat_values, DatetimeArray) and concat_values.tz is None + ): # if the result of concat is not an EA but an ndarray, reshape to # 2D to put it a non-EA Block # special case DatetimeArray, which *is* an EA, but is put in a From 053479136f5fe23b25f7e84146dabb799eb9bdc6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 5 Sep 2020 17:00:10 +0200 Subject: [PATCH 4/5] fix whitespace --- doc/source/whatsnew/v1.1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index d363855bd0aec..981bba1a30ff8 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -16,7 +16,7 @@ Fixed regressions ~~~~~~~~~~~~~~~~~ - Regression in :meth:`DatetimeIndex.intersection` incorrectly raising ``AssertionError`` when intersecting against a list (:issue:`35876`) - Fix regression in updating a column inplace (e.g. using ``df['col'].fillna(.., inplace=True)``) (:issue:`35731`) -- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) +- Fix regression in :meth:`DataFrame.append` mixing tz-aware and tz-naive datetime columns (:issue:`35460`) - Performance regression for :meth:`RangeIndex.format` (:issue:`35712`) - Regression in :meth:`DataFrame.replace` where a ``TypeError`` would be raised when attempting to replace elements of type :class:`Interval` (:issue:`35931`) - Fix regression in pickle roundtrip of the ``closed`` attribute of :class:`IntervalIndex` (:issue:`35658`) From 0d4a21cf81b8760828e90d8f4aaf9f31eaa35c22 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 6 Sep 2020 11:06:39 +0200 Subject: [PATCH 5/5] remove assert --- pandas/core/dtypes/concat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 171741adda5aa..dd005752a4832 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -150,7 +150,6 @@ def is_nonempty(x) -> bool: if any_ea: # we ignore axis here, as internally concatting with EAs is always # for axis=0 - # assert axis == 0 if not single_dtype: target_dtype = find_common_type([x.dtype for x in to_concat]) to_concat = [_cast_to_common_type(arr, target_dtype) for arr in to_concat]