Skip to content

Commit 4d7e1cf

Browse files
committed
BUG: first/last lose timezone in groupby
1 parent 028c9c0 commit 4d7e1cf

File tree

3 files changed

+48
-3
lines changed

3 files changed

+48
-3
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ Plotting
225225
Groupby/Resample/Rolling
226226
^^^^^^^^^^^^^^^^^^^^^^^^
227227

228-
-
228+
- Bug in :func:`pandas.core.groupby.first` and :func:`pandas.core.groupby.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
229229
-
230230
-
231231

pandas/core/groupby/groupby.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -4740,7 +4740,10 @@ def _wrap_transformed_output(self, output, names=None):
47404740

47414741
def _wrap_agged_blocks(self, items, blocks):
47424742
if not self.as_index:
4743-
index = np.arange(blocks[0].values.shape[1])
4743+
if blocks[0].values.ndim > 1:
4744+
index = np.arange(blocks[0].values.shape[1])
4745+
else:
4746+
index = np.arange(blocks[0].values.shape[0])
47444747
mgr = BlockManager(blocks, [items, index])
47454748
result = DataFrame(mgr)
47464749

pandas/tests/groupby/test_nth.py

+43-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import numpy as np
22
import pandas as pd
3-
from pandas import DataFrame, MultiIndex, Index, Series, isna
3+
from pandas import DataFrame, MultiIndex, Index, Series, isna, Timestamp
44
from pandas.compat import lrange
55
from pandas.util.testing import (
66
assert_frame_equal,
77
assert_produces_warning,
88
assert_series_equal)
9+
import pytest
910

1011

1112
def test_first_last_nth(df):
@@ -219,6 +220,47 @@ def test_nth_multi_index(three_group):
219220
assert_frame_equal(result, expected)
220221

221222

223+
@pytest.mark.parametrize('data, expected_first, expected_last', [
224+
({'id': ['A'], 'time': Timestamp('2012-02-01 14:00:00',
225+
tz='US/Central')},
226+
{'id': ['A'], 'time': Timestamp('2012-02-01 14:00:00',
227+
tz='US/Central')},
228+
{'id': ['A'], 'time': Timestamp('2012-02-01 14:00:00',
229+
tz='US/Central')}),
230+
({'id': ['A', 'B', 'A'],
231+
'time': [Timestamp('2012-01-01 13:00:00',
232+
tz='America/New_York'),
233+
Timestamp('2012-02-01 14:00:00',
234+
tz='US/Central'),
235+
Timestamp('2012-03-01 12:00:00',
236+
tz='Europe/London')]},
237+
{'id': ['A', 'B'],
238+
'time': [Timestamp('2012-01-01 13:00:00',
239+
tz='America/New_York'),
240+
Timestamp('2012-02-01 14:00:00',
241+
tz='US/Central')]},
242+
{'id': ['A', 'B'],
243+
'time': [Timestamp('2012-03-01 12:00:00',
244+
tz='Europe/London'),
245+
Timestamp('2012-02-01 14:00:00',
246+
tz='US/Central')]})
247+
])
248+
def test_first_last_tz(data, expected_first, expected_last):
249+
# GH15884
250+
# Test that the timezone is retained when calling first
251+
# or last on groupby with as_index=False
252+
253+
df = DataFrame(data)
254+
255+
result = df.groupby('id', as_index=False).first()
256+
expected = DataFrame(expected_first)
257+
assert_frame_equal(result, expected)
258+
259+
result = df.groupby('id', as_index=False).last()
260+
expected = DataFrame(expected_last)
261+
assert_frame_equal(result, expected)
262+
263+
222264
def test_nth_multi_index_as_expected():
223265
# PR 9090, related to issue 8979
224266
# test nth on MultiIndex

0 commit comments

Comments
 (0)