From af37225d4a25ece65bdd76d966386171ab924b25 Mon Sep 17 00:00:00 2001 From: Jason Bandlow Date: Sat, 3 Feb 2018 22:47:01 -0800 Subject: [PATCH] BUG: Fix ts precision issue with groupby and NaT (#19526) --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/core/groupby.py | 2 +- pandas/tests/groupby/aggregate/test_cython.py | 20 ++++++++++++++++++- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 69965f44d87a8..3fa1b195c4f9a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -548,7 +548,7 @@ Groupby/Resample/Rolling - Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`) - Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`) - Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`) -- +- Bug in :func:`DataFrame.groupby` where aggregation by ``first``/``last``/``min``/``max`` was causing timestamps to lose precision (:issue:`19526`) Sparse ^^^^^^ diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 2c1deb9db7bba..bf70e089310ff 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2324,7 +2324,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1): result = self._transform( result, values, labels, func, is_numeric, is_datetimelike) - if is_integer_dtype(result): + if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype('float64') diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index c8ee05ddbb74f..d20dbaab429ca 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -12,8 +12,10 @@ from numpy import nan import pandas as pd -from pandas import bdate_range, DataFrame, Index, Series +from pandas import (bdate_range, DataFrame, Index, Series, Timestamp, + Timedelta, NaT) from pandas.core.groupby import DataError +from pandas.core.indexes.numeric import Int64Index import pandas.util.testing as tm @@ -187,3 +189,19 @@ def test_cython_agg_empty_buckets_nanops(): {"a": [1, 1, 1716, 1]}, index=pd.CategoricalIndex(intervals, name='a', ordered=True)) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize('op', ['first', 'last', 'max', 'min']) +@pytest.mark.parametrize('data', [ + Timestamp('2016-10-14 21:00:44.557'), + Timedelta('17088 days 21:00:44.557'), ]) +def test_cython_with_timestamp_and_nat(op, data): + # https://github.com/pandas-dev/pandas/issues/19526 + df = DataFrame({'a': [0, 1], 'b': [data, NaT]}) + index = Int64Index([0, 1], dtype='int64', name='a') + + # We will group by a and test the cython aggregations + expected = DataFrame({'b': [data, NaT]}, index=index) + + result = df.groupby('a').aggregate(op) + tm.assert_frame_equal(expected, result)