Skip to content

Commit 63e8f68

Browse files
facaiyjreback
authored andcommitted
BUG: agg() function on groupby dataframe changes dtype of datetime64[ns] column to float64
closes #12821 closes #12941 Author: 颜发才(Yan Facai) <[email protected]> Closes #12992 from ningchi/agg_time_dtype and squashes the following commits: 607a170 [颜发才(Yan Facai)] add whatsnew entry 8d17eed [颜发才(Yan Facai)] BUG: fix GH12941, Operations on NaT returning float instead of datetime64[ns] a949cee [颜发才(Yan Facai)] BUG: fix GH12821, agg() function on groupby dataframe changes dtype of datetime64[ns] column to float64 914ed71 [颜发才(Yan Facai)] add test function
1 parent a292c13 commit 63e8f68

File tree

6 files changed

+68
-5
lines changed

6 files changed

+68
-5
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
.vagrant
1919
.noseids
2020
.ipynb_checkpoints
21+
.tags
2122

2223
# Compiled source #
2324
###################

doc/source/whatsnew/v0.19.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -960,5 +960,11 @@ Bug Fixes
960960
- Bug in ``Index.union`` returns an incorrect result with a named empty index (:issue:`13432`)
961961
- Bugs in ``Index.difference`` and ``DataFrame.join`` raise in Python3 when using mixed-integer indexes (:issue:`13432`, :issue:`12814`)
962962
- Bug in ``.to_excel()`` when DataFrame contains a MultiIndex which contains a label with a NaN value (:issue:`13511`)
963+
964+
- Bug in ``agg()`` function on groupby dataframe changes dtype of ``datetime64[ns]`` column to ``float64`` (:issue:`12821`)
965+
966+
- Bug in operations on ``NaT`` returning ``float`` instead of ``datetime64[ns]`` (:issue:`12941`)
967+
963968
- Bug in ``pd.read_csv`` in Python 2.x with non-UTF8 encoded, multi-character separated data (:issue:`3404`)
969+
964970
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)

pandas/core/nanops.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
is_integer, is_complex, is_float_dtype,
1818
is_complex_dtype, is_integer_dtype,
1919
is_bool_dtype, is_object_dtype,
20+
is_numeric_dtype,
2021
is_datetime64_dtype, is_timedelta64_dtype,
2122
is_datetime_or_timedelta_dtype,
2223
is_int_or_datetime_dtype, is_any_int_dtype)
@@ -638,11 +639,15 @@ def _maybe_null_out(result, axis, mask):
638639
if axis is not None and getattr(result, 'ndim', False):
639640
null_mask = (mask.shape[axis] - mask.sum(axis)) == 0
640641
if np.any(null_mask):
641-
if np.iscomplexobj(result):
642-
result = result.astype('c16')
642+
if is_numeric_dtype(result):
643+
if np.iscomplexobj(result):
644+
result = result.astype('c16')
645+
else:
646+
result = result.astype('f8')
647+
result[null_mask] = np.nan
643648
else:
644-
result = result.astype('f8')
645-
result[null_mask] = np.nan
649+
# GH12941, use None to auto cast null
650+
result[null_mask] = None
646651
elif result is not tslib.NaT:
647652
null_mask = mask.size - mask.sum()
648653
if null_mask == 0:

pandas/tests/frame/test_timeseries.py

+24
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,27 @@ def test_first_last_valid(self):
341341
empty = DataFrame()
342342
self.assertIsNone(empty.last_valid_index())
343343
self.assertIsNone(empty.first_valid_index())
344+
345+
def test_operation_on_NaT(self):
346+
# Both NaT and Timestamp are in DataFrame.
347+
df = pd.DataFrame({'foo': [pd.NaT, pd.NaT,
348+
pd.Timestamp('2012-05-01')]})
349+
350+
res = df.min()
351+
exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
352+
tm.assert_series_equal(res, exp)
353+
354+
res = df.max()
355+
exp = pd.Series([pd.Timestamp('2012-05-01')], index=["foo"])
356+
tm.assert_series_equal(res, exp)
357+
358+
# GH12941, only NaTs are in DataFrame.
359+
df = pd.DataFrame({'foo': [pd.NaT, pd.NaT]})
360+
361+
res = df.min()
362+
exp = pd.Series([pd.NaT], index=["foo"])
363+
tm.assert_series_equal(res, exp)
364+
365+
res = df.max()
366+
exp = pd.Series([pd.NaT], index=["foo"])
367+
tm.assert_series_equal(res, exp)

pandas/tests/test_groupby.py

+26
Original file line numberDiff line numberDiff line change
@@ -720,6 +720,32 @@ def test_agg_period_index(self):
720720
grouped = df.groupby(df.index.month)
721721
list(grouped)
722722

723+
def test_agg_dict_parameter_cast_result_dtypes(self):
724+
# GH 12821
725+
726+
df = DataFrame(
727+
{'class': ['A', 'A', 'B', 'B', 'C', 'C', 'D', 'D'],
728+
'time': date_range('1/1/2011', periods=8, freq='H')})
729+
df.loc[[0, 1, 2, 5], 'time'] = None
730+
731+
# test for `first` function
732+
exp = df.loc[[0, 3, 4, 6]].set_index('class')
733+
grouped = df.groupby('class')
734+
assert_frame_equal(grouped.first(), exp)
735+
assert_frame_equal(grouped.agg('first'), exp)
736+
assert_frame_equal(grouped.agg({'time': 'first'}), exp)
737+
assert_series_equal(grouped.time.first(), exp['time'])
738+
assert_series_equal(grouped.time.agg('first'), exp['time'])
739+
740+
# test for `last` function
741+
exp = df.loc[[0, 3, 4, 7]].set_index('class')
742+
grouped = df.groupby('class')
743+
assert_frame_equal(grouped.last(), exp)
744+
assert_frame_equal(grouped.agg('last'), exp)
745+
assert_frame_equal(grouped.agg({'time': 'last'}), exp)
746+
assert_series_equal(grouped.time.last(), exp['time'])
747+
assert_series_equal(grouped.time.agg('last'), exp['time'])
748+
723749
def test_agg_must_agg(self):
724750
grouped = self.df.groupby('A')['C']
725751
self.assertRaises(Exception, grouped.agg, lambda x: x.describe())

pandas/types/cast.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,8 @@ def trans(x): # noqa
122122
return new_result
123123

124124
# a datetimelike
125-
elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i']:
125+
# GH12821, iNaT is casted to float
126+
elif dtype.kind in ['M', 'm'] and result.dtype.kind in ['i', 'f']:
126127
try:
127128
result = result.astype(dtype)
128129
except:

0 commit comments

Comments
 (0)