Skip to content

Commit 6c17f67

Browse files
stephenrauchjreback
authored andcommitted
BUG: GH15426 timezone lost in groupby-agg with cython functions
closes #15426 Author: Stephen Rauch <[email protected]> Closes #15433 from stephenrauch/tz-lost-in-groupby-agg and squashes the following commits: 64a84ca [Stephen Rauch] BUG: GH15426 timezone lost in groupby-agg with cython functions
1 parent fb7dc7d commit 6c17f67

File tree

4 files changed

+44
-3
lines changed

4 files changed

+44
-3
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,7 @@ Bug Fixes
622622

623623
- Bug in ``DataFrame.to_stata()`` and ``StataWriter`` which produces incorrectly formatted files to be produced for some locales (:issue:`13856`)
624624
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
625+
- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`)
625626

626627

627628

pandas/tests/groupby/test_aggregate.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"""
77

88
from __future__ import print_function
9-
from datetime import datetime
9+
from datetime import datetime, timedelta
1010
from functools import partial
1111

1212
import numpy as np
@@ -738,3 +738,32 @@ def test_agg_over_numpy_arrays(self):
738738
columns=expected_column)
739739

740740
assert_frame_equal(result, expected)
741+
742+
def test_agg_timezone_round_trip(self):
743+
# GH 15426
744+
ts = pd.Timestamp("2016-01-01 12:00:00", tz='US/Pacific')
745+
df = pd.DataFrame({'a': 1, 'b': [ts + timedelta(minutes=nn)
746+
for nn in range(10)]})
747+
748+
result1 = df.groupby('a')['b'].agg(np.min).iloc[0]
749+
result2 = df.groupby('a')['b'].agg(lambda x: np.min(x)).iloc[0]
750+
result3 = df.groupby('a')['b'].min().iloc[0]
751+
752+
assert result1 == ts
753+
assert result2 == ts
754+
assert result3 == ts
755+
756+
dates = [pd.Timestamp("2016-01-0%d 12:00:00" % i, tz='US/Pacific')
757+
for i in range(1, 5)]
758+
df = pd.DataFrame({'A': ['a', 'b'] * 2, 'B': dates})
759+
grouped = df.groupby('A')
760+
761+
ts = df['B'].iloc[0]
762+
assert ts == grouped.nth(0)['B'].iloc[0]
763+
assert ts == grouped.head(1)['B'].iloc[0]
764+
assert ts == grouped.first()['B'].iloc[0]
765+
assert ts == grouped.apply(lambda x: x.iloc[0])[0]
766+
767+
ts = df['B'].iloc[2]
768+
assert ts == grouped.last()['B'].iloc[0]
769+
assert ts == grouped.apply(lambda x: x.iloc[-1])[0]

pandas/tests/types/test_cast.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
from datetime import datetime
99
import numpy as np
1010

11-
from pandas import Timedelta, Timestamp
11+
from pandas import Timedelta, Timestamp, DatetimeIndex
1212
from pandas.types.cast import (_possibly_downcast_to_dtype,
1313
_possibly_convert_objects,
1414
_infer_dtype_from_scalar,
@@ -71,6 +71,16 @@ def test_datetimelikes_nan(self):
7171
res = _possibly_downcast_to_dtype(arr, 'timedelta64[ns]')
7272
tm.assert_numpy_array_equal(res, exp)
7373

74+
def test_datetime_with_timezone(self):
75+
# GH 15426
76+
ts = Timestamp("2016-01-01 12:00:00", tz='US/Pacific')
77+
exp = DatetimeIndex([ts, ts])
78+
res = _possibly_downcast_to_dtype(exp, exp.dtype)
79+
tm.assert_index_equal(res, exp)
80+
81+
res = _possibly_downcast_to_dtype(exp.asi8, exp.dtype)
82+
tm.assert_index_equal(res, exp)
83+
7484

7585
class TestInferDtype(tm.TestCase):
7686

pandas/types/cast.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ def trans(x): # noqa
133133
if dtype.tz:
134134
# convert to datetime and change timezone
135135
from pandas import to_datetime
136-
result = to_datetime(result).tz_localize(dtype.tz)
136+
result = to_datetime(result).tz_localize('utc')
137+
result = result.tz_convert(dtype.tz)
137138

138139
except:
139140
pass

0 commit comments

Comments
 (0)