From 46943e0c43b836e559644a3e95e2ffde9fedb78c Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 11 Feb 2019 23:05:12 -0800
Subject: [PATCH 01/10] BUG: Groupby.agg cannot reduce with tz aware data

---
 pandas/_libs/reduction.pyx                   |  3 ++-
 pandas/tests/groupby/aggregate/test_other.py | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 507567cf480d7..28a67fb79ae2b 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -342,7 +342,8 @@ cdef class SeriesGrouper:
             index = None
         else:
             values = dummy.values
-            if dummy.dtype != self.arr.dtype:
+            if (dummy.dtype != self.arr.dtype
+                    and values.dtype != self.arr.dtype):
                 raise ValueError('Dummy array must be same dtype')
             if not values.flags.contiguous:
                 values = values.copy()
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index b5214b11bddcc..cacfdb7694de1 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -512,3 +512,18 @@ def test_agg_list_like_func():
     expected = pd.DataFrame({'A': [str(x) for x in range(3)],
                              'B': [[str(x)] for x in range(3)]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_agg_lambda_with_timezone():
+    # GH 23683
+    df = pd.DataFrame({
+        'tag': [1, 1],
+        'date': [
+            pd.Timestamp('2018-01-01', tz='UTC'),
+            pd.Timestamp('2018-01-02', tz='UTC')]
+    })
+    result = df.groupby('tag').agg({'date': lambda e: e.head(1)})
+    expected = pd.DataFrame([pd.Timestamp('2018-01-01', tz='UTC')],
+                            index=pd.Index([1], name='tag'),
+                            columns=['date'])
+    tm.assert_frame_equal(result, expected)

From 724a69e9a2ac35fe0301d8b0127e79534dd99a48 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 12 Feb 2019 16:32:13 -0800
Subject: [PATCH 02/10] Handle output always as UTC

---
 pandas/_libs/reduction.pyx     |  1 +
 pandas/core/groupby/groupby.py | 14 +++++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index 28a67fb79ae2b..517d59c399179 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -342,6 +342,7 @@ cdef class SeriesGrouper:
             index = None
         else:
             values = dummy.values
+            # GH 23683: datetimetz types are equivalent to datetime types here
             if (dummy.dtype != self.arr.dtype
                     and values.dtype != self.arr.dtype):
                 raise ValueError('Dummy array must be same dtype')
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c63bc5164e25b..ea911b75784b3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -26,7 +26,9 @@ class providing the base-class of operations.
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
-    ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar)
+    ensure_float, is_extension_array_dtype, is_datetime64tz_dtype,
+    is_numeric_dtype, is_scalar)
+from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms
@@ -768,7 +770,17 @@ def _try_cast(self, result, obj, numeric_only=False):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.
                 try:
+                    tz_dtype = None
+                    if is_datetime64tz_dtype(dtype):
+                        # GH 23683
+                        # Prior results were generated in UTC. Ensure
+                        # We localize to UTC first before converting to
+                        # the target timezone
+                        tz_dtype = dtype
+                        dtype = DatetimeTZDtype(tz='UTC')
                     result = obj._values._from_sequence(result, dtype=dtype)
+                    if tz_dtype is not None:
+                        result = result.astype(tz_dtype)
                 except Exception:
                     # https://github.com/pandas-dev/pandas/issues/22850
                     # pandas has no control over what 3rd-party ExtensionArrays

From 0d1eb5541888ef0ce37f8eeb8f31500adfab234e Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Wed, 13 Feb 2019 11:54:35 -0800
Subject: [PATCH 03/10] Add whatsnew

---
 doc/source/whatsnew/v0.25.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 95362521f3b9f..86e4a47d3b3a9 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -172,7 +172,7 @@ Plotting
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
+- Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
 -
 -
 

From b4913dc92fa9c095b304a2a5298b4d3769dd0fda Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Wed, 13 Feb 2019 13:05:03 -0800
Subject: [PATCH 04/10] isort and add another fixed groupby.first/last issue

---
 doc/source/whatsnew/v0.25.0.rst  |  2 +-
 pandas/core/groupby/groupby.py   |  2 +-
 pandas/tests/groupby/test_nth.py | 20 ++++++++++++++++++++
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 86e4a47d3b3a9..7d0b683a631ea 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -173,7 +173,7 @@ Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug in :func:`pandas.core.groupby.GroupBy.agg` when applying a aggregation function to timezone aware data (:issue:`23683`)
--
+- Bug in :func:`pandas.core.groupby.GroupBy.first` and :func:`pandas.core.groupby.GroupBy.last` where timezone information would be dropped (:issue:`21603`)
 -
 
 
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ea911b75784b3..77edd155125f2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -26,7 +26,7 @@ class providing the base-class of operations.
 
 from pandas.core.dtypes.cast import maybe_downcast_to_dtype
 from pandas.core.dtypes.common import (
-    ensure_float, is_extension_array_dtype, is_datetime64tz_dtype,
+    ensure_float, is_datetime64tz_dtype, is_extension_array_dtype,
     is_numeric_dtype, is_scalar)
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna, notna
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index 255d9a8acf2d0..7a3d189d3020e 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -278,6 +278,26 @@ def test_first_last_tz(data, expected_first, expected_last):
     assert_frame_equal(result, expected[['id', 'time']])
 
 
+@pytest.mark.parametrize('method, ts, alpha', [
+    ['first', Timestamp('2013-01-01', tz='US/Eastern'), 'a'],
+    ['last', Timestamp('2013-01-02', tz='US/Eastern'), 'b']
+])
+def test_first_last_tz_multi_column(method, ts, alpha):
+    # GH 21603
+    df = pd.DataFrame({'group': [1, 1, 2],
+                       'category_string': pd.Series(list('abc')).astype(
+                           'category'),
+                       'datetimetz': pd.date_range('20130101', periods=3,
+                                                   tz='US/Eastern')})
+    result = getattr(df.groupby('group'), method)()
+    expepcted = pd.DataFrame({'category_string': [alpha, 'c'],
+                              'datetimetz': [ts,
+                                             Timestamp('2013-01-03',
+                                                       tz='US/Eastern')]},
+                             index=pd.Index([1, 2], name='group'))
+    assert_frame_equal(result, expepcted)
+
+
 def test_nth_multi_index_as_expected():
     # PR 9090, related to issue 8979
     # test nth on MultiIndex

From da26b7b9ed85e03e369c48639c1bac73845bd766 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sat, 16 Feb 2019 15:36:18 -0800
Subject: [PATCH 05/10] bring condition at a higher level

---
 pandas/core/groupby/groupby.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 77edd155125f2..e8852e2f04b2c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -766,21 +766,18 @@ def _try_cast(self, result, obj, numeric_only=False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if is_extension_array_dtype(dtype):
+            if is_datetime64tz_dtype(dtype):
+                # GH 23683
+                # Prior results were generated in UTC. Ensure we localize to
+                # UTC first before converting to the target timezone
+                utc_dtype = DatetimeTZDtype(tz='UTC')
+                result = obj._values._from_sequence(result, dtype=utc_dtype)
+                result = result.astype(dtype)
+            elif is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.
                 try:
-                    tz_dtype = None
-                    if is_datetime64tz_dtype(dtype):
-                        # GH 23683
-                        # Prior results were generated in UTC. Ensure
-                        # We localize to UTC first before converting to
-                        # the target timezone
-                        tz_dtype = dtype
-                        dtype = DatetimeTZDtype(tz='UTC')
                     result = obj._values._from_sequence(result, dtype=dtype)
-                    if tz_dtype is not None:
-                        result = result.astype(tz_dtype)
                 except Exception:
                     # https://github.com/pandas-dev/pandas/issues/22850
                     # pandas has no control over what 3rd-party ExtensionArrays

From 60adcf066b392eab91bb5af5c3ae14e0d6b46d9d Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 17 Feb 2019 23:17:26 -0800
Subject: [PATCH 06/10] Add try for _try_cast

---
 pandas/core/groupby/groupby.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e8852e2f04b2c..c2afc7485f072 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -768,11 +768,16 @@ def _try_cast(self, result, obj, numeric_only=False):
         if not is_scalar(result):
             if is_datetime64tz_dtype(dtype):
                 # GH 23683
-                # Prior results were generated in UTC. Ensure we localize to
-                # UTC first before converting to the target timezone
-                utc_dtype = DatetimeTZDtype(tz='UTC')
-                result = obj._values._from_sequence(result, dtype=utc_dtype)
-                result = result.astype(dtype)
+                # Prior results _may_ have been generated in UTC.
+                # Ensure we localize to UTC first before converting
+                # to the target timezone
+                try:
+                    utc_dtype = DatetimeTZDtype(tz='UTC')
+                    result = obj._values._from_sequence(result,
+                                                        dtype=utc_dtype)
+                    result = result.astype(dtype)
+                except TypeError:
+                    pass
             elif is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.

From 206303ab07d8ffb4ee53db4892ed9a957481e967 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Tue, 19 Feb 2019 15:25:03 -0800
Subject: [PATCH 07/10] Add comments

---
 pandas/core/groupby/groupby.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c2afc7485f072..ca5c3f3711fdb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -777,6 +777,8 @@ def _try_cast(self, result, obj, numeric_only=False):
                                                         dtype=utc_dtype)
                     result = result.astype(dtype)
                 except TypeError:
+                    # _try_cast was called at a point where the result
+                    # was already tz-aware
                     pass
             elif is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check

From 0c0b43a0417ff2629e6eb3c0d2fcad16f275b48a Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 24 Feb 2019 11:39:05 -0800
Subject: [PATCH 08/10] Don't pass the utc_dtype explicitly

---
 pandas/core/groupby/groupby.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index ca5c3f3711fdb..dc8b8d520d2d3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -772,10 +772,8 @@ def _try_cast(self, result, obj, numeric_only=False):
                 # Ensure we localize to UTC first before converting
                 # to the target timezone
                 try:
-                    utc_dtype = DatetimeTZDtype(tz='UTC')
-                    result = obj._values._from_sequence(result,
-                                                        dtype=utc_dtype)
-                    result = result.astype(dtype)
+                    result = obj._values._from_sequence(result)
+                    result = result.tz_localize('UTC').tz_convert(dtype.tz)
                 except TypeError:
                     # _try_cast was called at a point where the result
                     # was already tz-aware

From 5bf07a99d74cdcb7aa994ce8adaf95c71ac4da63 Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Sun, 24 Feb 2019 13:52:30 -0800
Subject: [PATCH 09/10] Remove unused import

---
 pandas/core/groupby/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index dc8b8d520d2d3..cb2cf7b9e4e21 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -28,7 +28,6 @@ class providing the base-class of operations.
 from pandas.core.dtypes.common import (
     ensure_float, is_datetime64tz_dtype, is_extension_array_dtype,
     is_numeric_dtype, is_scalar)
-from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas.core.algorithms as algorithms

From 95a48d67ad76993ad93ab7ba6236d2577a9e3a1c Mon Sep 17 00:00:00 2001
From: Matt Roeschke <mroeschke@housecanary.com>
Date: Mon, 25 Feb 2019 21:57:46 -0800
Subject: [PATCH 10/10] Use string dtype instead

---
 pandas/core/groupby/groupby.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index cb2cf7b9e4e21..057f0484a86c4 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -771,8 +771,10 @@ def _try_cast(self, result, obj, numeric_only=False):
                 # Ensure we localize to UTC first before converting
                 # to the target timezone
                 try:
-                    result = obj._values._from_sequence(result)
-                    result = result.tz_localize('UTC').tz_convert(dtype.tz)
+                    result = obj._values._from_sequence(
+                        result, dtype='datetime64[ns, UTC]'
+                    )
+                    result = result.astype(dtype)
                 except TypeError:
                     # _try_cast was called at a point where the result
                     # was already tz-aware