BUG: first/last lose timezone in groupby with as_index=False (#21573)

reidy-p · jreback · commit c6347c4c4fcc · 2018-06-22T19:01:39.000-04:00
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -226,7 +226,7 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
 -
 -
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -4740,7 +4740,7 @@ def _wrap_transformed_output(self, output, names=None):
 
     def _wrap_agged_blocks(self, items, blocks):
         if not self.as_index:
-            index = np.arange(blocks[0].values.shape[1])
+            index = np.arange(blocks[0].values.shape[-1])
             mgr = BlockManager(blocks, [items, index])
             result = DataFrame(mgr)
 
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
@@ -1,11 +1,12 @@
 import numpy as np
 import pandas as pd
-from pandas import DataFrame, MultiIndex, Index, Series, isna
+from pandas import DataFrame, MultiIndex, Index, Series, isna, Timestamp
 from pandas.compat import lrange
 from pandas.util.testing import (
     assert_frame_equal,
     assert_produces_warning,
     assert_series_equal)
+import pytest
 
 
 def test_first_last_nth(df):
@@ -219,6 +220,64 @@ def test_nth_multi_index(three_group):
     assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize('data, expected_first, expected_last', [
+    ({'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]},
+     {'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]},
+     {'id': ['A'],
+      'time': Timestamp('2012-02-01 14:00:00',
+                        tz='US/Central'),
+      'foo': [1]}),
+    ({'id': ['A', 'B', 'A'],
+      'time': [Timestamp('2012-01-01 13:00:00',
+                         tz='America/New_York'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central'),
+               Timestamp('2012-03-01 12:00:00',
+                         tz='Europe/London')],
+      'foo': [1, 2, 3]},
+     {'id': ['A', 'B'],
+      'time': [Timestamp('2012-01-01 13:00:00',
+                         tz='America/New_York'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central')],
+      'foo': [1, 2]},
+     {'id': ['A', 'B'],
+      'time': [Timestamp('2012-03-01 12:00:00',
+                         tz='Europe/London'),
+               Timestamp('2012-02-01 14:00:00',
+                         tz='US/Central')],
+      'foo': [3, 2]})
+])
+def test_first_last_tz(data, expected_first, expected_last):
+    # GH15884
+    # Test that the timezone is retained when calling first
+    # or last on groupby with as_index=False
+
+    df = DataFrame(data)
+
+    result = df.groupby('id', as_index=False).first()
+    expected = DataFrame(expected_first)
+    cols = ['id', 'time', 'foo']
+    assert_frame_equal(result[cols], expected[cols])
+
+    result = df.groupby('id', as_index=False)['time'].first()
+    assert_frame_equal(result, expected[['id', 'time']])
+
+    result = df.groupby('id', as_index=False).last()
+    expected = DataFrame(expected_last)
+    cols = ['id', 'time', 'foo']
+    assert_frame_equal(result[cols], expected[cols])
+
+    result = df.groupby('id', as_index=False)['time'].last()
+    assert_frame_equal(result, expected[['id', 'time']])
+
+
 def test_nth_multi_index_as_expected():
     # PR 9090, related to issue 8979
     # test nth on MultiIndex

Original file line number	Diff line number	Diff line change
`@@ -226,7 +226,7 @@ Plotting`
`226`	`226`	`Groupby/Resample/Rolling`
`227`	`227`	`^^^^^^^^^^^^^^^^^^^^^^^^`
`228`	`228`
`229`		`--`
	`229`	+- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` with ``as_index=False`` leading to the loss of timezone information (:issue:`15884`)
`230`	`230`	`-`
`231`	`231`	`-`
`232`	`232`