diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 123fc346441cb..47cf789679617 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -622,6 +622,7 @@ Bug Fixes - Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`) - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) +- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`) diff --git a/pandas/tests/groupby/test_aggregate.py b/pandas/tests/groupby/test_aggregate.py index a1fc97eb8d780..dd4a52e5103b5 100644 --- a/pandas/tests/groupby/test_aggregate.py +++ b/pandas/tests/groupby/test_aggregate.py @@ -6,7 +6,7 @@ """ from __future__ import print_function -from datetime import datetime +from datetime import datetime, timedelta from functools import partial import numpy as np @@ -738,3 +738,32 @@ def test_agg_over_numpy_arrays(self): columns=expected_column) assert_frame_equal(result, expected) + + def test_agg_time_zone_round_trip(self): + # GH 15426 + ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn) + for nn in range(10)]}) + + result1 = df.groupby('a')['b'].agg(np.min).iloc[0] + result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0] + result3 = df.groupby('a')['b'].min().iloc[0] + + self.assertEqual(result1, ts) + self.assertEqual(result2, ts) + self.assertEqual(result3, ts) + + dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific') + for i in range(1, 5)] + df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates}) + grouped = df.groupby('A') + + ts = df['B'].iloc[0] + self.assertEqual(ts, grouped.nth(0)['B'].iloc[0]) + self.assertEqual(ts, grouped.head(1)['B'].iloc[0]) + self.assertEqual(ts, grouped.first()['B'].iloc[0]) + self.assertEqual(ts, grouped.apply(lambda x: x.iloc[0])[0]) + + ts = df['B'].iloc[2] + self.assertEqual(ts, grouped.last()['B'].iloc[0]) + self.assertEqual(ts, grouped.apply(lambda x: x.iloc[-1])[0]) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 497130b117289..ae96196ce6a00 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -8,7 +8,7 @@ from datetime import datetime import numpy as np -from pandas import Timedelta, Timestamp +from pandas import Timedelta, Timestamp, DatetimeIndex from pandas.types.cast import (_possibly_downcast_to_dtype, _possibly_convert_objects, _infer_dtype_from_scalar, @@ -71,6 +71,13 @@ def test_datetimelikes_nan(self): res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]') tm.assert_numpy_array_equal(res, exp) + def test_datetime_downcast(self): + # GH 15426 + ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific') + exp = DatetimeIndex([ts, ts]) + res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype) + tm.assert_index_equal(res, exp) + class TestInferDtype(tm.TestCase): diff --git a/pandas/types/cast.py b/pandas/types/cast.py index b1a17df64aecf..8cc3fe41f73c8 100644 --- a/pandas/types/cast.py +++ b/pandas/types/cast.py @@ -133,7 +133,8 @@ def trans(x): # noqa if dtype.tz: # convert to datetime and change timezone from pandas import to_datetime - result = to_datetime(result).tz_localize(dtype.tz) + result = to_datetime(result).tz_localize('utc') + result = result.tz_convert(dtype.tz) except: pass