From dc97ba934ee6d98a738219fdee56fd78ab094dd7 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 14 Dec 2019 18:24:08 +0000 Subject: [PATCH 1/5] :bug: don't remove timezone-awareness when using the method from DataFrame --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 18 ++++++------------ pandas/tests/frame/test_combine_concat.py | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1253788d7ff27..faca744a8f92c 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -714,6 +714,7 @@ Datetimelike - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`) - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`) - Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`) +- Bug in :meth:`DataFrame.append` would remove the timezone-awareness of new data (:issue:`30238`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 51330bfc55dc3..d9fa52c105913 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6757,25 +6757,19 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False): " or if the Series has a name" ) - if other.name is None: - index = None - else: - # other must have the same index name as self, otherwise - # index name will be reset - index = Index([other.name], name=self.index.name) + index = Index([other.name], name=self.index.name) idx_diff = other.index.difference(self.columns) try: combined_columns = self.columns.append(idx_diff) except TypeError: combined_columns = self.columns.astype(object).append(idx_diff) - other = other.reindex(combined_columns, copy=False) - other = DataFrame( - other.values.reshape((1, len(other))), - index=index, - columns=combined_columns, + other = ( + other.reindex(combined_columns, copy=False) + .to_frame() + .T.rename_axis(index.names) + ._convert(datetime=True, timedelta=True) ) - other = other._convert(datetime=True, timedelta=True) if not self.columns.equals(combined_columns): self = self.reindex(columns=combined_columns) elif isinstance(other, list): diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index e72de487abb2f..b4567a4603184 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -2,6 +2,7 @@ import numpy as np import pytest +import pytz import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -288,6 +289,20 @@ def test_append_dtypes(self): expected = DataFrame({"bar": Series([Timestamp("20130101"), 1])}) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( + "timestamp, timezone", + [ + ("2019-07-19 07:04:57+0100", pytz.FixedOffset(60)), + ("2019-07-19 07:04:57", None), + ], + ) + def test_append_timestamps_aware_or_naive(self, timestamp, timezone): + # GH 30238 + df = pd.DataFrame([pd.Timestamp(timestamp, tz=timezone)]) + result = df.append(df.iloc[0]).iloc[-1] + expected = pd.Series(pd.Timestamp(timestamp, tz=timezone), name=0) + pd.testing.assert_series_equal(result, expected) + def test_update(self): df = DataFrame( [[1.5, np.nan, 3.0], [1.5, np.nan, 3.0], [1.5, np.nan, 3], [1.5, np.nan, 3]] From 5046588178800c1c59b80bcc2d2c70e3c8020e01 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 16 Dec 2019 10:20:28 +0000 Subject: [PATCH 2/5] :ok_hand: simplify , use fixture in test --- pandas/core/frame.py | 5 ++--- pandas/tests/frame/test_combine_concat.py | 14 +++++--------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d9fa52c105913..cf3b8c4703a0d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6758,7 +6758,6 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False): ) index = Index([other.name], name=self.index.name) - idx_diff = other.index.difference(self.columns) try: combined_columns = self.columns.append(idx_diff) @@ -6767,8 +6766,8 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False): other = ( other.reindex(combined_columns, copy=False) .to_frame() - .T.rename_axis(index.names) - ._convert(datetime=True, timedelta=True) + .T.infer_objects() + .rename_axis(index.names) ) if not self.columns.equals(combined_columns): self = self.reindex(columns=combined_columns) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index b4567a4603184..b5339962b72cc 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -2,7 +2,6 @@ import numpy as np import pytest -import pytz import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -290,17 +289,14 @@ def test_append_dtypes(self): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "timestamp, timezone", - [ - ("2019-07-19 07:04:57+0100", pytz.FixedOffset(60)), - ("2019-07-19 07:04:57", None), - ], + "timestamp", ["2019-07-19 07:04:57+0100", "2019-07-19 07:04:57"] ) - def test_append_timestamps_aware_or_naive(self, timestamp, timezone): + def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): # GH 30238 - df = pd.DataFrame([pd.Timestamp(timestamp, tz=timezone)]) + tz = tz_naive_fixture + df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) result = df.append(df.iloc[0]).iloc[-1] - expected = pd.Series(pd.Timestamp(timestamp, tz=timezone), name=0) + expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0) pd.testing.assert_series_equal(result, expected) def test_update(self): From f608b2280aa1e12f40a4686961979e624a081ab1 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Tue, 24 Dec 2019 10:32:27 +0000 Subject: [PATCH 3/5] :ok_hand: add copy=False to .rename_axis, use tm instead of pd.testing --- pandas/core/frame.py | 2 +- pandas/tests/frame/test_combine_concat.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index cf3b8c4703a0d..dfda1470413b7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6767,7 +6767,7 @@ def append(self, other, ignore_index=False, verify_integrity=False, sort=False): other.reindex(combined_columns, copy=False) .to_frame() .T.infer_objects() - .rename_axis(index.names) + .rename_axis(index.names, copy=False) ) if not self.columns.equals(combined_columns): self = self.reindex(columns=combined_columns) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index b5339962b72cc..ebc4438366001 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -297,7 +297,7 @@ def test_append_timestamps_aware_or_naive(self, tz_naive_fixture, timestamp): df = pd.DataFrame([pd.Timestamp(timestamp, tz=tz)]) result = df.append(df.iloc[0]).iloc[-1] expected = pd.Series(pd.Timestamp(timestamp, tz=tz), name=0) - pd.testing.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) def test_update(self): df = DataFrame( From 578671ae0030339129beae2f99d734313a2b78b4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli Date: Sat, 14 Dec 2019 18:24:08 +0000 Subject: [PATCH 4/5] :bug: don't remove timezone-awareness when using the method from DataFrame --- pandas/tests/frame/test_combine_concat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index ebc4438366001..f06ec92fad125 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -2,6 +2,7 @@ import numpy as np import pytest +import pytz import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range From e5a08c79bffd1e4da1772655354ed8452f785d54 Mon Sep 17 00:00:00 2001 From: Marco Gorelli Date: Mon, 16 Dec 2019 10:20:28 +0000 Subject: [PATCH 5/5] :ok_hand: simplify , use fixture in test --- pandas/tests/frame/test_combine_concat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py index f06ec92fad125..ebc4438366001 100644 --- a/pandas/tests/frame/test_combine_concat.py +++ b/pandas/tests/frame/test_combine_concat.py @@ -2,7 +2,6 @@ import numpy as np import pytest -import pytz import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range