From 850aaf9da0a5679847dac2c93d16ab70bf2b7533 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 13 Nov 2018 21:48:23 -0800 Subject: [PATCH 1/3] BUG: Append DataFrame to Series with dateutil timezone --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/_libs/lib.pyx | 19 +++++++++---------- pandas/tests/reshape/test_concat.py | 15 +++++++++++++++ 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index d836ef3441e89..0b5ffd0345607 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -1364,6 +1364,7 @@ Reshaping - Bug in :func:`pandas.concat` when concatenating a multicolumn DataFrame with tz-aware data against a DataFrame with a different number of columns (:issue`22796`) - Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`) - Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have :class:`MultiIndex`ed columns (:issue:`23033`). +- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`) .. _whatsnew_0240.bug_fixes.sparse: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index cfc60256e97a3..432d514c87dfd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -48,8 +48,7 @@ cdef extern from "src/parse_helper.h": int floatify(object, float64_t *result, int *maybe_int) except -1 cimport util -from util cimport (is_nan, - UINT8_MAX, UINT64_MAX, INT64_MAX, INT64_MIN) +from util cimport is_nan, UINT64_MAX, INT64_MAX, INT64_MIN from tslib import array_to_datetime from tslibs.nattype cimport NPY_NAT @@ -1647,15 +1646,15 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: base_val = values[i] if base_val is not NaT: base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) - - for j in range(i, n): - val = values[j] - if val is not NaT: - tz = getattr(val, 'tzinfo', None) - if not tz_compare(base_tz, tz): - return False break + for j in range(i, n): + val = values[j] + if val is not NaT: + tz = getattr(val, 'tzinfo', None) + if not tz_compare(base_tz, tz): + return False + return True @@ -2045,7 +2044,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, # we try to coerce datetime w/tz but must all have the same tz if seen.datetimetz_: - if len({getattr(val, 'tzinfo', None) for val in objects}) == 1: + if is_datetime_with_singletz_array(objects): from pandas import DatetimeIndex return DatetimeIndex(objects) seen.object_ = 1 diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index c7fba47a8f27c..ebfe49462e05c 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1010,6 +1010,21 @@ def test_append_missing_column_proper_upcast(self, sort): assert appended['A'].dtype == 'f8' assert appended['B'].dtype == 'O' + def test_append_empty_frame_to_series_with_dateutil_tz(self): + # GH 23682 + date = Timestamp('2018-10-24 07:30:00', tz=dateutil.tz.UTC) + s = Series({'date': date, 'a': 1.0, 'b': 2.0}) + df = DataFrame(columns=['c', 'd']) + result = df.append(s, ignore_index=True) + expected = DataFrame([[np.nan, np.nan, 1., 2., date]], + columns=['c', 'd', 'a', 'b', 'date']) + # These columns get cast to object after append + object_cols = ['c', 'd', 'date'] + expected.loc[:, object_cols] = expected.loc[:, object_cols].astype( + object + ) + assert_frame_equal(result, expected) + class TestConcatenate(ConcatenateBase): From 36ddb6892a6fa06865cb728e6beec2591a4b3b2b Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 14 Nov 2018 10:42:18 -0800 Subject: [PATCH 2/3] Comments and use more compatable dateutil.tz.tzutc --- pandas/_libs/lib.pyx | 4 +++- pandas/tests/reshape/test_concat.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 432d514c87dfd..596f3922a14dd 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1641,13 +1641,15 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: if n == 0: return False - + # Get a reference timezone to compare with the rest for i in range(n): base_val = values[i] if base_val is not NaT: base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) break + # Compare the reference timezone with the rest of the timezones + # in the array for j in range(i, n): val = values[j] if val is not NaT: diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ebfe49462e05c..07b00cef2669e 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -1012,7 +1012,7 @@ def test_append_missing_column_proper_upcast(self, sort): def test_append_empty_frame_to_series_with_dateutil_tz(self): # GH 23682 - date = Timestamp('2018-10-24 07:30:00', tz=dateutil.tz.UTC) + date = Timestamp('2018-10-24 07:30:00', tz=dateutil.tz.tzutc()) s = Series({'date': date, 'a': 1.0, 'b': 2.0}) df = DataFrame(columns=['c', 'd']) result = df.append(s, ignore_index=True) From 0479c3069bae6bfc60811c376b5b59522d88f0a6 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 14 Nov 2018 10:46:03 -0800 Subject: [PATCH 3/3] add better comments --- pandas/_libs/lib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 596f3922a14dd..0088a698f49e0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1641,16 +1641,16 @@ def is_datetime_with_singletz_array(values: ndarray) -> bool: if n == 0: return False - # Get a reference timezone to compare with the rest + # Get a reference timezone to compare with the rest of the tzs in the array for i in range(n): base_val = values[i] if base_val is not NaT: base_tz = get_timezone(getattr(base_val, 'tzinfo', None)) break - # Compare the reference timezone with the rest of the timezones - # in the array for j in range(i, n): + # Compare val's timezone with the reference timezone + # NaT can coexist with tz-aware datetimes, so skip if encountered val = values[j] if val is not NaT: tz = getattr(val, 'tzinfo', None)