24
24
is_bool_dtype ,
25
25
is_scalar ,
26
26
is_list_like ,
27
+ needs_i8_conversion ,
27
28
_ensure_float64 ,
28
29
_ensure_platform_int ,
29
30
_ensure_int64 ,
@@ -1844,15 +1845,21 @@ def _cython_operation(self, kind, values, how, axis):
1844
1845
"supported for the 'how' argument" )
1845
1846
out_shape = (self .ngroups ,) + values .shape [1 :]
1846
1847
1848
+ is_datetimelike = needs_i8_conversion (values .dtype )
1847
1849
is_numeric = is_numeric_dtype (values .dtype )
1848
1850
1849
- if is_datetime_or_timedelta_dtype ( values . dtype ) :
1851
+ if is_datetimelike :
1850
1852
values = values .view ('int64' )
1851
1853
is_numeric = True
1852
1854
elif is_bool_dtype (values .dtype ):
1853
1855
values = _ensure_float64 (values )
1854
1856
elif is_integer_dtype (values ):
1855
- values = values .astype ('int64' , copy = False )
1857
+ # we use iNaT for the missing value on ints
1858
+ # so pre-convert to guard this condition
1859
+ if (values == tslib .iNaT ).any ():
1860
+ values = _ensure_float64 (values )
1861
+ else :
1862
+ values = values .astype ('int64' , copy = False )
1856
1863
elif is_numeric and not is_complex_dtype (values ):
1857
1864
values = _ensure_float64 (values )
1858
1865
else :
@@ -1881,20 +1888,17 @@ def _cython_operation(self, kind, values, how, axis):
1881
1888
fill_value = np .nan )
1882
1889
counts = np .zeros (self .ngroups , dtype = np .int64 )
1883
1890
result = self ._aggregate (
1884
- result , counts , values , labels , func , is_numeric )
1891
+ result , counts , values , labels , func ,
1892
+ is_numeric , is_datetimelike )
1885
1893
elif kind == 'transform' :
1886
1894
result = _maybe_fill (np .empty_like (values , dtype = out_dtype ),
1887
1895
fill_value = np .nan )
1888
1896
1889
1897
# temporary storange for running-total type tranforms
1890
1898
accum = np .empty (out_shape , dtype = out_dtype )
1891
1899
result = self ._transform (
1892
- result , accum , values , labels , func , is_numeric )
1893
-
1894
- if is_integer_dtype (result ):
1895
- if len (result [result == tslib .iNaT ]) > 0 :
1896
- result = result .astype ('float64' )
1897
- result [result == tslib .iNaT ] = np .nan
1900
+ result , accum , values , labels , func ,
1901
+ is_numeric , is_datetimelike )
1898
1902
1899
1903
if kind == 'aggregate' and \
1900
1904
self ._filter_empty_groups and not counts .all ():
@@ -1929,8 +1933,19 @@ def aggregate(self, values, how, axis=0):
1929
1933
def transform (self , values , how , axis = 0 ):
1930
1934
return self ._cython_operation ('transform' , values , how , axis )
1931
1935
1936
+ def _maybe_mask_missing (self , result , is_datetimelike ):
1937
+ # we use iNaT as a marker for missing values
1938
+ # but we *only* care for non-datetimelikes
1939
+ if is_integer_dtype (result ) and not is_datetimelike :
1940
+ mask = result == tslib .iNaT
1941
+ if mask .any ():
1942
+ result = result .astype ('float64' )
1943
+ result [mask ] = np .nan
1944
+ return result
1945
+
1932
1946
def _aggregate (self , result , counts , values , comp_ids , agg_func ,
1933
- is_numeric ):
1947
+ is_numeric , is_datetimelike ):
1948
+
1934
1949
if values .ndim > 3 :
1935
1950
# punting for now
1936
1951
raise NotImplementedError ("number of dimensions is currently "
@@ -1943,11 +1958,12 @@ def _aggregate(self, result, counts, values, comp_ids, agg_func,
1943
1958
else :
1944
1959
agg_func (result , counts , values , comp_ids )
1945
1960
1946
- return result
1961
+ return self . _maybe_mask_missing ( result , is_datetimelike )
1947
1962
1948
1963
def _transform (self , result , accum , values , comp_ids , transform_func ,
1949
- is_numeric ):
1964
+ is_numeric , is_datetimelike ):
1950
1965
comp_ids , _ , ngroups = self .group_info
1966
+
1951
1967
if values .ndim > 3 :
1952
1968
# punting for now
1953
1969
raise NotImplementedError ("number of dimensions is currently "
@@ -1961,7 +1977,7 @@ def _transform(self, result, accum, values, comp_ids, transform_func,
1961
1977
else :
1962
1978
transform_func (result , values , comp_ids , accum )
1963
1979
1964
- return result
1980
+ return self . _maybe_mask_missing ( result , is_datetimelike )
1965
1981
1966
1982
def agg_series (self , obj , func ):
1967
1983
try :
0 commit comments