Skip to content

Commit c6347c4

Browse files
reidy-pjreback
authored andcommitted
BUG: first/last lose timezone in groupby with as_index=False (#21573)
1 parent 7d8626d commit c6347c4

File tree

3 files changed

+62
-3
lines changed

3 files changed

+62
-3
lines changed

doc/source/whatsnew/v0.24.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ Plotting
226226
Groupby/Resample/Rolling
227227
^^^^^^^^^^^^^^^^^^^^^^^^
228228

229-
-
229+
- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
230230
-
231231
-
232232

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4740,7 +4740,7 @@ def _wrap_transformed_output(self, output, names=None):
47404740

47414741
def _wrap_agged_blocks(self, items, blocks):
47424742
if not self.as_index:
4743-
index = np.arange(blocks[0].values.shape[1])
4743+
index = np.arange(blocks[0].values.shape[-1])
47444744
mgr = BlockManager(blocks, [items, index])
47454745
result = DataFrame(mgr)
47464746

pandas/tests/groupby/test_nth.py

+60-1
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
import numpy as np
22
import pandas as pd
3-
from pandas import DataFrame, MultiIndex, Index, Series, isna
3+
from pandas import DataFrame, MultiIndex, Index, Series, isna, Timestamp
44
from pandas.compat import lrange
55
from pandas.util.testing import (
66
assert_frame_equal,
77
assert_produces_warning,
88
assert_series_equal)
9+
import pytest
910

1011

1112
def test_first_last_nth(df):
@@ -219,6 +220,64 @@ def test_nth_multi_index(three_group):
219220
assert_frame_equal(result, expected)
220221

221222

223+
@pytest.mark.parametrize('data, expected_first, expected_last', [
224+
({'id': ['A'],
225+
'time': Timestamp('2012-02-01 14:00:00',
226+
tz='US/Central'),
227+
'foo': [1]},
228+
{'id': ['A'],
229+
'time': Timestamp('2012-02-01 14:00:00',
230+
tz='US/Central'),
231+
'foo': [1]},
232+
{'id': ['A'],
233+
'time': Timestamp('2012-02-01 14:00:00',
234+
tz='US/Central'),
235+
'foo': [1]}),
236+
({'id': ['A', 'B', 'A'],
237+
'time': [Timestamp('2012-01-01 13:00:00',
238+
tz='America/New_York'),
239+
Timestamp('2012-02-01 14:00:00',
240+
tz='US/Central'),
241+
Timestamp('2012-03-01 12:00:00',
242+
tz='Europe/London')],
243+
'foo': [1, 2, 3]},
244+
{'id': ['A', 'B'],
245+
'time': [Timestamp('2012-01-01 13:00:00',
246+
tz='America/New_York'),
247+
Timestamp('2012-02-01 14:00:00',
248+
tz='US/Central')],
249+
'foo': [1, 2]},
250+
{'id': ['A', 'B'],
251+
'time': [Timestamp('2012-03-01 12:00:00',
252+
tz='Europe/London'),
253+
Timestamp('2012-02-01 14:00:00',
254+
tz='US/Central')],
255+
'foo': [3, 2]})
256+
])
257+
def test_first_last_tz(data, expected_first, expected_last):
258+
# GH15884
259+
# Test that the timezone is retained when calling first
260+
# or last on groupby with as_index=False
261+
262+
df = DataFrame(data)
263+
264+
result = df.groupby('id', as_index=False).first()
265+
expected = DataFrame(expected_first)
266+
cols = ['id', 'time', 'foo']
267+
assert_frame_equal(result[cols], expected[cols])
268+
269+
result = df.groupby('id', as_index=False)['time'].first()
270+
assert_frame_equal(result, expected[['id', 'time']])
271+
272+
result = df.groupby('id', as_index=False).last()
273+
expected = DataFrame(expected_last)
274+
cols = ['id', 'time', 'foo']
275+
assert_frame_equal(result[cols], expected[cols])
276+
277+
result = df.groupby('id', as_index=False)['time'].last()
278+
assert_frame_equal(result, expected[['id', 'time']])
279+
280+
222281
def test_nth_multi_index_as_expected():
223282
# PR 9090, related to issue 8979
224283
# test nth on MultiIndex

0 commit comments

Comments
 (0)