diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 4bfae7de01b8f..3c3f6358d6579 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -225,7 +225,7 @@ Plotting Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`) - - diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 3bc59157055ce..0bbdfbbe52ac4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -4740,7 +4740,7 @@ def _wrap_transformed_output(self, output, names=None): def _wrap_agged_blocks(self, items, blocks): if not self.as_index: - index = np.arange(blocks[0].values.shape[1]) + index = np.arange(blocks[0].values.shape[-1]) mgr = BlockManager(blocks, [items, index]) result = DataFrame(mgr) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index a32ba9ad76f14..a1b748cd50e8f 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -1,11 +1,12 @@ import numpy as np import pandas as pd -from pandas import DataFrame, MultiIndex, Index, Series, isna +from pandas import DataFrame, MultiIndex, Index, Series, isna, Timestamp from pandas.compat import lrange from pandas.util.testing import ( assert_frame_equal, assert_produces_warning, assert_series_equal) +import pytest def test_first_last_nth(df): @@ -219,6 +220,64 @@ def test_nth_multi_index(three_group): assert_frame_equal(result, expected) +@pytest.mark.parametrize('data, expected_first, expected_last', [ + ({'id': ['A'], + 'time': Timestamp('2012-02-01 14:00:00', + tz='US/Central'), + 'foo': [1]}, + {'id': ['A'], + 'time': Timestamp('2012-02-01 14:00:00', + tz='US/Central'), + 'foo': [1]}, + {'id': ['A'], + 'time': Timestamp('2012-02-01 14:00:00', + tz='US/Central'), + 'foo': [1]}), + ({'id': ['A', 'B', 'A'], + 'time': [Timestamp('2012-01-01 13:00:00', + tz='America/New_York'), + Timestamp('2012-02-01 14:00:00', + tz='US/Central'), + Timestamp('2012-03-01 12:00:00', + tz='Europe/London')], + 'foo': [1, 2, 3]}, + {'id': ['A', 'B'], + 'time': [Timestamp('2012-01-01 13:00:00', + tz='America/New_York'), + Timestamp('2012-02-01 14:00:00', + tz='US/Central')], + 'foo': [1, 2]}, + {'id': ['A', 'B'], + 'time': [Timestamp('2012-03-01 12:00:00', + tz='Europe/London'), + Timestamp('2012-02-01 14:00:00', + tz='US/Central')], + 'foo': [3, 2]}) +]) +def test_first_last_tz(data, expected_first, expected_last): + # GH15884 + # Test that the timezone is retained when calling first + # or last on groupby with as_index=False + + df = DataFrame(data) + + result = df.groupby('id', as_index=False).first() + expected = DataFrame(expected_first) + cols = ['id', 'time', 'foo'] + assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby('id', as_index=False)['time'].first() + assert_frame_equal(result, expected[['id', 'time']]) + + result = df.groupby('id', as_index=False).last() + expected = DataFrame(expected_last) + cols = ['id', 'time', 'foo'] + assert_frame_equal(result[cols], expected[cols]) + + result = df.groupby('id', as_index=False)['time'].last() + assert_frame_equal(result, expected[['id', 'time']]) + + def test_nth_multi_index_as_expected(): # PR 9090, related to issue 8979 # test nth on MultiIndex