Skip to content

Commit 5f5d8fa

Browse files
committed
BUG: Fix ts precision issue with groupby and NaT (pandas-dev#19526)
1 parent 3f3b4e0 commit 5f5d8fa

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

doc/source/whatsnew/v0.23.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -548,7 +548,7 @@ Groupby/Resample/Rolling
548548
- Bug in :func:`DataFrame.resample` which silently ignored unsupported (or mistyped) options for ``label``, ``closed`` and ``convention`` (:issue:`19303`)
549549
- Bug in :func:`DataFrame.groupby` where tuples were interpreted as lists of keys rather than as keys (:issue:`17979`, :issue:`18249`)
550550
- Bug in ``transform`` where particular aggregation functions were being incorrectly cast to match the dtype(s) of the grouped data (:issue:`19200`)
551-
-
551+
- Bug in :func:`DataFrame.groupby` where the use of cython aggregation functions was causing timestamps to lose precision (:issue:`19526`)
552552

553553
Sparse
554554
^^^^^^

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2324,7 +2324,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1):
23242324
result = self._transform(
23252325
result, values, labels, func, is_numeric, is_datetimelike)
23262326

2327-
if is_integer_dtype(result):
2327+
if is_integer_dtype(result) and not is_datetimelike:
23282328
mask = result == iNaT
23292329
if mask.any():
23302330
result = result.astype('float64')

pandas/tests/groupby/test_groupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -2758,6 +2758,26 @@ def test_tuple_correct_keyerror(self):
27582758
with tm.assert_raises_regex(KeyError, "(7, 8)"):
27592759
df.groupby((7, 8)).mean()
27602760

2761+
def test_cython_with_timestamp_and_nat(self):
2762+
# https://github.com/pandas-dev/pandas/issues/19526
2763+
ts = pd.Timestamp('2016-10-14 21:00:44.557')
2764+
df = pd.DataFrame({'a': [0, 1], 'b': [ts, pd.NaT]})
2765+
index = pd.Int64Index([0, 1], dtype='int64', name='a')
2766+
2767+
# We will group by a and test the cython aggregations
2768+
expected = pd.DataFrame({'b': [ts, pd.NaT]}, index=index)
2769+
2770+
result = df.groupby('a').max()
2771+
assert_frame_equal(expected, result)
2772+
2773+
result = df.groupby('a').min()
2774+
assert_frame_equal(expected, result)
2775+
2776+
result = df.groupby('a').first()
2777+
assert_frame_equal(expected, result)
2778+
2779+
result = df.groupby('a').last()
2780+
assert_frame_equal(expected, result)
27612781

27622782
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
27632783
tups = lmap(tuple, df[keys].values)

0 commit comments

Comments
 (0)