Skip to content

Commit 7b5fae9

Browse files
committed
Merge pull request pandas-dev#2985 from jreback/idxmin
BUG: Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982)
2 parents 501ba37 + a7cab81 commit 7b5fae9

File tree

6 files changed

+124
-75
lines changed

6 files changed

+124
-75
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ pandas 0.11.0
126126

127127
- Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_)
128128
- Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_)
129+
- Bug in idxmin/idxmax of ``datetime64[ns]`` Series with ``NaT`` (GH2982__)
129130

130131
.. _GH622: https://github.com/pydata/pandas/issues/622
131132
.. _GH797: https://github.com/pydata/pandas/issues/797
@@ -147,6 +148,7 @@ pandas 0.11.0
147148
.. _GH2931: https://github.com/pydata/pandas/issues/2931
148149
.. _GH2973: https://github.com/pydata/pandas/issues/2973
149150
.. _GH2967: https://github.com/pydata/pandas/issues/2967
151+
.. _GH2982: https://github.com/pydata/pandas/issues/2982
150152

151153

152154
pandas 0.10.1

doc/source/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1186,7 +1186,7 @@ A query is specified using the ``Term`` class under the hood.
11861186

11871187
Valid terms can be created from ``dict, list, tuple, or
11881188
string``. Objects can be embeded as values. Allowed operations are: ``<,
1189-
<=, >, >=, =``. ``=`` will be inferred as an implicit set operation
1189+
<=, >, >=, =, !=``. ``=`` will be inferred as an implicit set operation
11901190
(e.g. if 2 or more values are provided). The following are all valid
11911191
terms.
11921192

pandas/core/common.py

+31-12
Original file line numberDiff line numberDiff line change
@@ -682,19 +682,30 @@ def _infer_dtype_from_scalar(val):
682682

683683

684684
def _maybe_promote(dtype, fill_value=np.nan):
685+
686+
# if we passed an array here, determine the fill value by dtype
687+
if isinstance(fill_value,np.ndarray):
688+
if issubclass(fill_value.dtype.type, (np.datetime64,np.timedelta64)):
689+
fill_value = tslib.iNaT
690+
else:
691+
fill_value = np.nan
692+
685693
# returns tuple of (dtype, fill_value)
686-
if issubclass(dtype.type, np.datetime64):
694+
if issubclass(dtype.type, (np.datetime64,np.timedelta64)):
687695
# for now: refuse to upcast datetime64
688696
# (this is because datetime64 will not implicitly upconvert
689697
# to object correctly as of numpy 1.6.1)
690698
if isnull(fill_value):
691699
fill_value = tslib.iNaT
692700
else:
693-
try:
694-
fill_value = lib.Timestamp(fill_value).value
695-
except:
696-
# the proper thing to do here would probably be to upcast to
697-
# object (but numpy 1.6.1 doesn't do this properly)
701+
if issubclass(dtype.type, np.datetime64):
702+
try:
703+
fill_value = lib.Timestamp(fill_value).value
704+
except:
705+
# the proper thing to do here would probably be to upcast to
706+
# object (but numpy 1.6.1 doesn't do this properly)
707+
fill_value = tslib.iNaT
708+
else:
698709
fill_value = tslib.iNaT
699710
elif is_float(fill_value):
700711
if issubclass(dtype.type, np.bool_):
@@ -722,24 +733,33 @@ def _maybe_promote(dtype, fill_value=np.nan):
722733
return dtype, fill_value
723734

724735

725-
def _maybe_upcast_putmask(result, mask, other):
736+
def _maybe_upcast_putmask(result, mask, other, dtype=None):
726737
""" a safe version of put mask that (potentially upcasts the result
727738
return the result and a changed flag """
728739
try:
729740
np.putmask(result, mask, other)
730741
except:
731742
# our type is wrong here, need to upcast
732743
if (-mask).any():
733-
result, fill_value = _maybe_upcast(result, copy=True)
744+
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
734745
np.putmask(result, mask, other)
735746
return result, True
736747

737748
return result, False
738749

739-
def _maybe_upcast(values, fill_value=np.nan, copy=False):
750+
def _maybe_upcast(values, fill_value=np.nan, dtype=None, copy=False):
740751
""" provide explicty type promotion and coercion
741-
if copy == True, then a copy is created even if no upcast is required """
742-
new_dtype, fill_value = _maybe_promote(values.dtype, fill_value)
752+
753+
Parameters
754+
----------
755+
values : the ndarray that we want to maybe upcast
756+
fill_value : what we want to fill with
757+
dtype : if None, then use the dtype of the values, else coerce to this type
758+
copy : if True always make a copy even if no upcast is required """
759+
760+
if dtype is None:
761+
dtype = values.dtype
762+
new_dtype, fill_value = _maybe_promote(dtype, fill_value)
743763
if new_dtype != values.dtype:
744764
values = values.astype(new_dtype)
745765
elif copy:
@@ -915,7 +935,6 @@ def _possibly_convert_platform(values):
915935

916936
return values
917937

918-
919938
def _possibly_cast_to_timedelta(value, coerce=True):
920939
""" try to cast to timedelta64 w/o coercion """
921940

pandas/core/nanops.py

+63-55
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pandas.lib as lib
99
import pandas.algos as algos
1010
import pandas.hashtable as _hash
11+
import pandas.tslib as tslib
1112

1213
try:
1314
import bottleneck as bn
@@ -69,8 +70,61 @@ def _has_infs(result):
6970
else:
7071
return np.isinf(result) or np.isneginf(result)
7172

73+
def _get_fill_value(dtype, fill_value=None, fill_value_typ=None):
74+
""" return the correct fill value for the dtype of the values """
75+
if fill_value is not None:
76+
return fill_value
77+
if _na_ok_dtype(dtype):
78+
if fill_value_typ is None:
79+
return np.nan
80+
else:
81+
if fill_value_typ == '+inf':
82+
return np.inf
83+
else:
84+
return -np.inf
85+
else:
86+
if fill_value_typ is None:
87+
return tslib.iNaT
88+
else:
89+
if fill_value_typ == '+inf':
90+
# need the max int here
91+
return np.iinfo(np.int64).max
92+
else:
93+
return tslib.iNaT
94+
95+
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True):
96+
""" utility to get the values view, mask, dtype
97+
if necessary copy and mask using the specified fill_value
98+
copy = True will force the copy """
99+
if isfinite:
100+
mask = _isfinite(values)
101+
else:
102+
mask = isnull(values)
103+
104+
dtype = values.dtype
105+
dtype_ok = _na_ok_dtype(dtype)
106+
107+
# get our fill value (in case we need to provide an alternative dtype for it)
108+
fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ)
109+
110+
if skipna:
111+
if copy:
112+
values = values.copy()
113+
if dtype_ok:
114+
np.putmask(values, mask, fill_value)
115+
116+
# promote if needed
117+
else:
118+
values, changed = com._maybe_upcast_putmask(values, mask, fill_value)
119+
120+
elif copy:
121+
values = values.copy()
122+
123+
values = _view_if_needed(values)
124+
return values, mask, dtype
125+
72126
def _isfinite(values):
73-
if issubclass(values.dtype.type, np.timedelta64):
127+
if issubclass(values.dtype.type, (np.timedelta64,np.datetime64)):
74128
return isnull(values)
75129
return -np.isfinite(values)
76130

@@ -99,43 +153,21 @@ def _wrap_results(result,dtype):
99153
return result
100154

101155
def nanany(values, axis=None, skipna=True):
102-
mask = isnull(values)
103-
104-
if skipna:
105-
values = values.copy()
106-
np.putmask(values, mask, False)
156+
values, mask, dtype = _get_values(values, skipna, False, copy=skipna)
107157
return values.any(axis)
108158

109-
110159
def nanall(values, axis=None, skipna=True):
111-
mask = isnull(values)
112-
113-
if skipna:
114-
values = values.copy()
115-
np.putmask(values, mask, True)
160+
values, mask, dtype = _get_values(values, skipna, True, copy=skipna)
116161
return values.all(axis)
117162

118-
119163
def _nansum(values, axis=None, skipna=True):
120-
mask = isnull(values)
121-
122-
if skipna and not issubclass(values.dtype.type, np.integer):
123-
values = values.copy()
124-
np.putmask(values, mask, 0)
125-
164+
values, mask, dtype = _get_values(values, skipna, 0)
126165
the_sum = values.sum(axis)
127166
the_sum = _maybe_null_out(the_sum, axis, mask)
128-
129167
return the_sum
130168

131-
132169
def _nanmean(values, axis=None, skipna=True):
133-
mask = isnull(values)
134-
135-
if skipna and not issubclass(values.dtype.type, np.integer):
136-
values = values.copy()
137-
np.putmask(values, mask, 0)
138-
170+
values, mask, dtype = _get_values(values, skipna, 0)
139171
the_sum = _ensure_numeric(values.sum(axis))
140172
count = _get_counts(mask, axis)
141173

@@ -186,15 +218,7 @@ def _nanvar(values, axis=None, skipna=True, ddof=1):
186218

187219

188220
def _nanmin(values, axis=None, skipna=True):
189-
mask = isnull(values)
190-
191-
dtype = values.dtype
192-
193-
if skipna and _na_ok_dtype(dtype):
194-
values = values.copy()
195-
np.putmask(values, mask, np.inf)
196-
197-
values = _view_if_needed(values)
221+
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf')
198222

199223
# numpy 1.6.1 workaround in Python 3.x
200224
if (values.dtype == np.object_
@@ -218,15 +242,7 @@ def _nanmin(values, axis=None, skipna=True):
218242

219243

220244
def _nanmax(values, axis=None, skipna=True):
221-
mask = isnull(values)
222-
223-
dtype = values.dtype
224-
225-
if skipna and _na_ok_dtype(dtype):
226-
values = values.copy()
227-
np.putmask(values, mask, -np.inf)
228-
229-
values = _view_if_needed(values)
245+
values, mask, dtype = _get_values(values, skipna, fill_value_typ ='-inf')
230246

231247
# numpy 1.6.1 workaround in Python 3.x
232248
if (values.dtype == np.object_
@@ -254,11 +270,7 @@ def nanargmax(values, axis=None, skipna=True):
254270
"""
255271
Returns -1 in the NA case
256272
"""
257-
mask = _isfinite(values)
258-
values = _view_if_needed(values)
259-
if not issubclass(values.dtype.type, np.integer):
260-
values = values.copy()
261-
np.putmask(values, mask, -np.inf)
273+
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '-inf', isfinite=True)
262274
result = values.argmax(axis)
263275
result = _maybe_arg_null_out(result, axis, mask, skipna)
264276
return result
@@ -268,11 +280,7 @@ def nanargmin(values, axis=None, skipna=True):
268280
"""
269281
Returns -1 in the NA case
270282
"""
271-
mask = _isfinite(values)
272-
values = _view_if_needed(values)
273-
if not issubclass(values.dtype.type, np.integer):
274-
values = values.copy()
275-
np.putmask(values, mask, np.inf)
283+
values, mask, dtype = _get_values(values, skipna, fill_value_typ = '+inf', isfinite=True)
276284
result = values.argmin(axis)
277285
result = _maybe_arg_null_out(result, axis, mask, skipna)
278286
return result

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2112,13 +2112,13 @@ def argsort(self, axis=0, kind='quicksort', order=None):
21122112
mask = isnull(values)
21132113

21142114
if mask.any():
2115-
result = Series(-1,index=self.index,name=self.name)
2115+
result = Series(-1,index=self.index,name=self.name,dtype='int64')
21162116
notmask = -mask
21172117
result.values[notmask] = np.argsort(self.values[notmask], kind=kind)
21182118
return result
21192119
else:
21202120
return Series(np.argsort(values, kind=kind), index=self.index,
2121-
name=self.name)
2121+
name=self.name,dtype='int64')
21222122

21232123
def rank(self, method='average', na_option='keep', ascending=True):
21242124
"""

pandas/tests/test_series.py

+25-5
Original file line numberDiff line numberDiff line change
@@ -1816,14 +1816,15 @@ def test_timedelta64_functions(self):
18161816
result = td.idxmax()
18171817
self.assert_(result == 2)
18181818

1819-
# with NaT (broken)
1819+
# GH 2982
1820+
# with NaT
18201821
td[0] = np.nan
18211822

1822-
#result = td.idxmin()
1823-
#self.assert_(result == 1)
1823+
result = td.idxmin()
1824+
self.assert_(result == 1)
18241825

1825-
#result = td.idxmax()
1826-
#self.assert_(result == 2)
1826+
result = td.idxmax()
1827+
self.assert_(result == 2)
18271828

18281829
# abs
18291830
s1 = Series(date_range('20120101',periods=3))
@@ -2065,6 +2066,16 @@ def test_idxmin(self):
20652066
allna = self.series * nan
20662067
self.assert_(isnull(allna.idxmin()))
20672068

2069+
# datetime64[ns]
2070+
from pandas import date_range
2071+
s = Series(date_range('20130102',periods=6))
2072+
result = s.idxmin()
2073+
self.assert_(result == 0)
2074+
2075+
s[0] = np.nan
2076+
result = s.idxmin()
2077+
self.assert_(result == 1)
2078+
20682079
def test_idxmax(self):
20692080
# test idxmax
20702081
# _check_stat_op approach can not be used here because of isnull check.
@@ -2086,6 +2097,15 @@ def test_idxmax(self):
20862097
allna = self.series * nan
20872098
self.assert_(isnull(allna.idxmax()))
20882099

2100+
from pandas import date_range
2101+
s = Series(date_range('20130102',periods=6))
2102+
result = s.idxmax()
2103+
self.assert_(result == 5)
2104+
2105+
s[5] = np.nan
2106+
result = s.idxmax()
2107+
self.assert_(result == 4)
2108+
20892109
def test_operators_corner(self):
20902110
series = self.ts
20912111

0 commit comments

Comments
 (0)