BUG: Fix ts precision issue with groupby and NaT (pandas-dev#19526)

jbandlow · jbandlow · commit 5f5d8fa127e9 · 2018-02-03T22:47:29.000-08:00
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -548,7 +548,7 @@ Groupby/Resample/Rolling
 - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
 - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
 - Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
--
+- Bug in :func:`DataFrame.groupby` where the use of cython aggregation functions was causing timestamps to lose precision (:issue:`19526`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2324,7 +2324,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1):
             result = self._transform(
                 result, values, labels, func, is_numeric, is_datetimelike)
 
-        if is_integer_dtype(result):
+        if is_integer_dtype(result) and not is_datetimelike:
             mask = result == iNaT
             if mask.any():
                 result = result.astype('float64')
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -2758,6 +2758,26 @@ def test_tuple_correct_keyerror(self):
         with tm.assert_raises_regex(KeyError, "(7, 8)"):
             df.groupby((7, 8)).mean()
 
+    def test_cython_with_timestamp_and_nat(self):
+        # https://github.com/pandas-dev/pandas/issues/19526
+        ts = pd.Timestamp('2016-10-14 21:00:44.557')
+        df = pd.DataFrame({'a': [0, 1], 'b': [ts, pd.NaT]})
+        index = pd.Int64Index([0, 1], dtype='int64', name='a')
+
+        # We will group by a and test the cython aggregations
+        expected = pd.DataFrame({'b': [ts, pd.NaT]}, index=index)
+
+        result = df.groupby('a').max()
+        assert_frame_equal(expected, result)
+
+        result = df.groupby('a').min()
+        assert_frame_equal(expected, result)
+
+        result = df.groupby('a').first()
+        assert_frame_equal(expected, result)
+
+        result = df.groupby('a').last()
+        assert_frame_equal(expected, result)
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)