From daaddad7f8eab1394ef28259b5412d23fae88578 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 15 Nov 2019 09:50:08 -0800
Subject: [PATCH 01/14] REF: make _aggregate_series_pure_python extraction
 behave like the cython version

---
 pandas/core/groupby/groupby.py | 17 ++---------------
 pandas/core/groupby/ops.py     | 12 +++++++++---
 2 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 280f1e88b0ea8..74e9f50ac4d61 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -789,20 +789,7 @@ def _try_cast(self, result, obj, numeric_only: bool = False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if is_datetime64tz_dtype(dtype):
-                # GH 23683
-                # Prior results _may_ have been generated in UTC.
-                # Ensure we localize to UTC first before converting
-                # to the target timezone
-                arr = extract_array(obj)
-                try:
-                    result = arr._from_sequence(result, dtype="datetime64[ns, UTC]")
-                    result = result.astype(dtype)
-                except TypeError:
-                    # _try_cast was called at a point where the result
-                    # was already tz-aware
-                    pass
-            elif is_extension_array_dtype(dtype):
+            if is_extension_array_dtype(dtype):
                 # The function can return something of any type, so check
                 # if the type is compatible with the calling EA.
 
@@ -869,7 +856,7 @@ def _cython_agg_general(
             if numeric_only and not is_numeric:
                 continue
 
-            result, names = self.grouper.aggregate(obj.values, how, min_count=min_count)
+            result, names = self.grouper.aggregate(obj._values, how, min_count=min_count)
             output[name] = self._try_cast(result, obj)
 
         if len(output) == 0:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7ed79e4b00371..3e8befcfe9a0a 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -604,11 +604,11 @@ def agg_series(self, obj: Series, func):
             # SeriesGrouper would raise if we were to call _aggregate_series_fast
             return self._aggregate_series_pure_python(obj, func)
 
-        elif is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M":
+        elif is_extension_array_dtype(obj.dtype):
             # _aggregate_series_fast would raise TypeError when
             #  calling libreduction.Slider
+            # In the datetime64tz case it would incorrectly cast to tz-naive
             # TODO: can we get a performant workaround for EAs backed by ndarray?
-            # TODO: is the datetime64tz case supposed to go through here?
             return self._aggregate_series_pure_python(obj, func)
 
         elif isinstance(obj.index, MultiIndex):
@@ -657,7 +657,13 @@ def _aggregate_series_pure_python(self, obj: Series, func):
             res = func(group)
             if result is None:
                 if isinstance(res, (Series, Index, np.ndarray)):
-                    raise ValueError("Function does not reduce")
+                    if len(res) == 1:
+                        # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
+                        # FIXME: are we potentially losing import res.index info?
+                        res = getattr(res, "_values", res)
+                        res = res[0]
+                    else:
+                        raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")
 
             counts[label] = group.shape[0]

From 18bb6c979196230442868a797ee273aff0c9668a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 15 Nov 2019 10:05:30 -0800
Subject: [PATCH 02/14] blackify

---
 pandas/core/groupby/groupby.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 74e9f50ac4d61..d2d8c699eda62 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -856,7 +856,9 @@ def _cython_agg_general(
             if numeric_only and not is_numeric:
                 continue
 
-            result, names = self.grouper.aggregate(obj._values, how, min_count=min_count)
+            result, names = self.grouper.aggregate(
+                obj._values, how, min_count=min_count
+            )
             output[name] = self._try_cast(result, obj)
 
         if len(output) == 0:

From 201bc27fc58a73bac54882f16efc648d56fc1ebc Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 15 Nov 2019 15:32:49 -0800
Subject: [PATCH 03/14] add tests

---
 pandas/core/groupby/groupby.py               |  8 ++++----
 pandas/tests/groupby/aggregate/test_other.py | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d2d8c699eda62..39d094494bad2 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -31,7 +31,6 @@ class providing the base-class of operations.
 from pandas.core.dtypes.common import (
     ensure_float,
     is_datetime64_dtype,
-    is_datetime64tz_dtype,
     is_extension_array_dtype,
     is_integer_dtype,
     is_numeric_dtype,
@@ -45,7 +44,6 @@ class providing the base-class of operations.
 from pandas.core.arrays import Categorical, try_cast_to_ea
 from pandas.core.base import DataError, PandasObject, SelectionMixin
 import pandas.core.common as com
-from pandas.core.construction import extract_array
 from pandas.core.frame import DataFrame
 from pandas.core.generic import NDFrame
 from pandas.core.groupby import base
@@ -789,9 +787,11 @@ def _try_cast(self, result, obj, numeric_only: bool = False):
             dtype = obj.dtype
 
         if not is_scalar(result):
-            if is_extension_array_dtype(dtype):
+            if is_extension_array_dtype(dtype) and dtype.kind != "M":
                 # The function can return something of any type, so check
-                # if the type is compatible with the calling EA.
+                #  if the type is compatible with the calling EA.
+                # datetime64tz is handled correctly in agg_series,
+                #  so is excluded here.
 
                 # return the same type (Series) as our caller
                 cls = dtype.construct_array_type()
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 1c297f3e2ada3..296e0e61d68f9 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -454,6 +454,26 @@ def test_agg_over_numpy_arrays():
     tm.assert_frame_equal(result, expected)
 
 
+def test_agg_tzaware_non_datetime_result():
+    # discussed in GH#29589, fixed in GH#29641, operating on tzaware values
+    #  with function that is not dtype-preserving
+    dti = pd.date_range("2012-01-01", periods=4, tz="UTC")
+    df = pd.DataFrame({"a": [0, 0, 1, 1], "b": dti})
+    gb = df.groupby("a")
+
+    # Case that _does_ preserve the dtype
+    result = gb["b"].agg(lambda x: x.iloc[0])
+    expected = pd.Series(dti[::2], name="b")
+    expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
+    # Case that does _not_ preserve the dtype
+    result = gb["b"].agg(lambda x: x.iloc[0].year)
+    expected = pd.Series([2012, 2012], name="b")
+    expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
+
 def test_agg_timezone_round_trip():
     # GH 15426
     ts = pd.Timestamp("2016-01-01 12:00:00", tz="US/Pacific")

From 61f32e5096dec1727733cbc2deab2cf1423530b7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 16 Nov 2019 07:57:01 -0800
Subject: [PATCH 04/14] added test

---
 pandas/tests/groupby/aggregate/test_other.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 296e0e61d68f9..721045f1097f8 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -467,12 +467,17 @@ def test_agg_tzaware_non_datetime_result():
     expected.index.name = "a"
     tm.assert_series_equal(result, expected)
 
-    # Case that does _not_ preserve the dtype
+    # Cases that do _not_ preserve the dtype
     result = gb["b"].agg(lambda x: x.iloc[0].year)
     expected = pd.Series([2012, 2012], name="b")
     expected.index.name = "a"
     tm.assert_series_equal(result, expected)
 
+    result = gb["b"].agg(lambda x: x.iloc[-1] - x.iloc[0])
+    expected = pd.Series([pd.Timedelta(days=1), pd.Timedelta(days=1)], name="b")
+    expected.index.name = "a"
+    tm.assert_series_equal(result, expected)
+
 
 def test_agg_timezone_round_trip():
     # GH 15426

From fb9f1714a27f65820bce013b470780467c9a6d6d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 16 Nov 2019 14:32:44 -0800
Subject: [PATCH 05/14] avoid _values getattr pattern

---
 pandas/core/groupby/ops.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 3e8befcfe9a0a..4b52dd752deba 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -660,8 +660,9 @@ def _aggregate_series_pure_python(self, obj: Series, func):
                     if len(res) == 1:
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing import res.index info?
-                        res = getattr(res, "_values", res)
-                        res = res[0]
+
+                        # For non-Series we could just do `res[0]`
+                        res = next(iter(res))
                     else:
                         raise ValueError("Function does not reduce")
                 result = np.empty(ngroups, dtype="O")

From d542515e3c1d77d1cf37c3008224c83406bb8edb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 Nov 2019 08:02:27 -0800
Subject: [PATCH 06/14] todo comment

---
 pandas/core/groupby/ops.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 4b52dd752deba..47ca2b2190ecf 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -661,6 +661,7 @@ def _aggregate_series_pure_python(self, obj: Series, func):
                         # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                         # FIXME: are we potentially losing import res.index info?
 
+                        # TODO: use `.item()` if/when we un-deprecate it.
                         # For non-Series we could just do `res[0]`
                         res = next(iter(res))
                     else:

From 71b21c75f9c161198c0cce58aa869fac7b845908 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 16 Nov 2019 19:32:05 -0800
Subject: [PATCH 07/14] simplify _transform_fast

---
 pandas/core/groupby/generic.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 6376dbefcf435..58a75557c50d8 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -406,7 +406,7 @@ def transform(self, func, *args, **kwargs):
                 # result to the whole group. Compute func result
                 # and deal with possible broadcasting below.
                 return self._transform_fast(
-                    lambda: getattr(self, func)(*args, **kwargs), func
+                    lambda: getattr(self, func)(*args, **kwargs)
                 )
 
         # reg transform
@@ -443,19 +443,13 @@ def transform(self, func, *args, **kwargs):
         result.index = self._selected_obj.index
         return result
 
-    def _transform_fast(self, func, func_nm) -> Series:
+    def _transform_fast(self, func) -> Series:
         """
         fast version of transform, only applicable to
         builtin/cythonizable functions
         """
-        if isinstance(func, str):
-            func = getattr(self, func)
-
         ids, _, ngroup = self.grouper.group_info
-        cast = self._transform_should_cast(func_nm)
         out = algorithms.take_1d(func()._values, ids)
-        if cast:
-            out = self._try_cast(out, self.obj)
         return Series(out, index=self.obj.index, name=self.obj.name)
 
     def filter(self, func, dropna=True, *args, **kwargs):
@@ -1381,6 +1375,8 @@ def _transform_fast(self, result: DataFrame, obj: DataFrame, func_nm) -> DataFra
         output = []
         for i, _ in enumerate(result.columns):
             res = algorithms.take_1d(result.iloc[:, i].values, ids)
+            # TODO: we have no test cases that get here with EA dtypes;
+            #  try_cast is not needed if EAs never get here
             if cast:
                 res = self._try_cast(res, obj.iloc[:, i])
             output.append(res)

From 640710a1adcc730b74844b9ac4a4bb6bf99226c9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 16 Nov 2019 19:59:20 -0800
Subject: [PATCH 08/14] remove _try_casts

---
 pandas/core/groupby/generic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 58a75557c50d8..76ba48b23b2f4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1051,12 +1051,12 @@ def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
         if axis != obj._info_axis_number:
             for name, data in self:
                 fres = func(data, *args, **kwargs)
-                result[name] = self._try_cast(fres, data)
+                result[name] = fres
         else:
             for name in self.indices:
                 data = self.get_group(name, obj=obj)
                 fres = func(data, *args, **kwargs)
-                result[name] = self._try_cast(fres, data)
+                result[name] = fres
 
         return self._wrap_frame_output(result, obj)
 

From a0676fbb23f85917498048b35312f406e28ba321 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 Nov 2019 08:43:48 -0800
Subject: [PATCH 09/14] comments

---
 pandas/core/groupby/generic.py | 5 +++--
 pandas/core/groupby/grouper.py | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 76ba48b23b2f4..e448a9900efdc 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -406,7 +406,7 @@ def transform(self, func, *args, **kwargs):
                 # result to the whole group. Compute func result
                 # and deal with possible broadcasting below.
                 return self._transform_fast(
-                    lambda: getattr(self, func)(*args, **kwargs)
+                    lambda: getattr(self, func)(*args, **kwargs), func
                 )
 
         # reg transform
@@ -443,7 +443,7 @@ def transform(self, func, *args, **kwargs):
         result.index = self._selected_obj.index
         return result
 
-    def _transform_fast(self, func) -> Series:
+    def _transform_fast(self, func, func_nm) -> Series:
         """
         fast version of transform, only applicable to
         builtin/cythonizable functions
@@ -1109,6 +1109,7 @@ def _decide_output_index(self, output, labels):
 
         return output_keys
 
+    # TODO: this function is way too big
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             return DataFrame(index=keys)
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index eb1442aeb8a4c..b739fb706be03 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -245,6 +245,7 @@ class Grouping:
       * group_index : unique groups
       * groups : dict of {group -> label_list}
     """
+    # TODO: too much going on here
 
     def __init__(
         self,
@@ -431,6 +432,7 @@ def groups(self) -> dict:
         return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
 
 
+# TODO: this function is way too big
 def get_grouper(
     obj: FrameOrSeries,
     key=None,

From ab125ec4cbbcf3417d649e543ebad66ce8aeaf3f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 Nov 2019 17:38:20 -0800
Subject: [PATCH 10/14] Whatsnew

---
 doc/source/whatsnew/v1.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index cb68bd0e762c4..ee954d95df69b 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -438,6 +438,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
 - Bug in :meth:`DataFrameGroupBy.rolling().quantile()` ignoring ``interpolation`` keyword argument (:issue:`28779`)
+- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrect casting results to the original dtype (:issue:`29641`)
 
 Reshaping
 ^^^^^^^^^

From 96df6d936609692aaeca2873d4e87b1e41fdf142 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 18 Nov 2019 09:23:23 -0800
Subject: [PATCH 11/14] remove comments

---
 pandas/core/groupby/generic.py | 1 -
 pandas/core/groupby/grouper.py | 2 --
 2 files changed, 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e448a9900efdc..dc6be99c07447 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1109,7 +1109,6 @@ def _decide_output_index(self, output, labels):
 
         return output_keys
 
-    # TODO: this function is way too big
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         if len(keys) == 0:
             return DataFrame(index=keys)
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 8d8dc08cce9e0..c37617b1f1f7f 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -245,7 +245,6 @@ class Grouping:
       * group_index : unique groups
       * groups : dict of {group -> label_list}
     """
-    # TODO: too much going on here
 
     def __init__(
         self,
@@ -432,7 +431,6 @@ def groups(self) -> dict:
         return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
 
 
-# TODO: this function is way too big
 def get_grouper(
     obj: FrameOrSeries,
     key=None,

From dceda2f29ebb8b3c62fa5a3dbc0e4893d40bd627 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 18 Nov 2019 12:07:40 -0800
Subject: [PATCH 12/14] revert parts in other PRs

---
 pandas/core/groupby/generic.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1712f75b459bb..8b3346ee860e1 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -448,8 +448,14 @@ def _transform_fast(self, func, func_nm) -> Series:
         fast version of transform, only applicable to
         builtin/cythonizable functions
         """
+        if isinstance(func, str):
+            func = getattr(self, func)
+
         ids, _, ngroup = self.grouper.group_info
+        cast = self._transform_should_cast(func_nm)
         out = algorithms.take_1d(func()._values, ids)
+        if cast:
+            out = self._try_cast(out, self.obj)
         return Series(out, index=self.obj.index, name=self.obj.name)
 
     def filter(self, func, dropna=True, *args, **kwargs):
@@ -1369,7 +1375,7 @@ def _transform_fast(self, result: DataFrame, obj: DataFrame, func_nm) -> DataFra
         for i, _ in enumerate(result.columns):
             res = algorithms.take_1d(result.iloc[:, i].values, ids)
             # TODO: we have no test cases that get here with EA dtypes;
-            #  try_cast is not needed if EAs never get here
+            #  try_cast may not be needed if EAs never get here
             if cast:
                 res = self._try_cast(res, obj.iloc[:, i])
             output.append(res)

From 1965e22c3770ecc367d1d5817f6e84dc18402686 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 18 Nov 2019 15:05:01 -0800
Subject: [PATCH 13/14] comment

---
 pandas/core/apply.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 071cd116ea982..34a8ed1fa7a83 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -226,6 +226,8 @@ def apply_raw(self):
             if "Function does not reduce" not in str(err):
                 # catch only ValueError raised intentionally in libreduction
                 raise
+            # We expect np.apply_along_axis to give a two-dimensional result, or
+            #  also raise.
             result = np.apply_along_axis(self.f, self.axis, self.values)
 
         # TODO: mixed type case

From 0cccfa51d44e5919d7f2b54299d001354cfc069c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 20 Nov 2019 17:14:41 -0800
Subject: [PATCH 14/14] remove note

---
 doc/source/whatsnew/v1.0.0.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 900e5a5fc3d70..db24be628dd67 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -510,7 +510,6 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 - Bug in :meth:`DataFrame.groupby` losing column name information when grouping by a categorical column (:issue:`28787`)
 - Bug in :meth:`DataFrameGroupBy.rolling().quantile()` ignoring ``interpolation`` keyword argument (:issue:`28779`)
-- Bug in :meth:`DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrect casting results to the original dtype (:issue:`29641`)
 - Bug in :meth:`DataFrame.groupby` where ``any``, ``all``, ``nunique`` and transform functions would incorrectly handle duplicate column labels (:issue:`21668`)
 -