Skip to content

Commit 81dba66

Browse files
committed
PERF: Improve dt-like index ops
1 parent f4da4b9 commit 81dba66

File tree

6 files changed

+29
-21
lines changed

6 files changed

+29
-21
lines changed

doc/source/whatsnew/v0.16.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ Performance Improvements
4848

4949
- Improved ``Series.resample`` performance with dtype=datetime64[ns] (:issue:`7754`)
5050
- Modest improvement in datetime writing speed in to_csv (:issue:`10271`)
51+
- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`)
52+
5153

5254
.. _whatsnew_0162.bug_fixes:
5355

pandas/tseries/base.py

+13-12
Original file line numberDiff line numberDiff line change
@@ -130,10 +130,15 @@ def get_duplicates(self):
130130
values = Index.get_duplicates(self)
131131
return self._simple_new(values)
132132

133+
@cache_readonly
134+
def _isnan(self):
135+
""" return if each value is nan"""
136+
return (self.asi8 == tslib.iNaT)
137+
133138
@cache_readonly
134139
def hasnans(self):
135140
""" return if I have any nans; enables various perf speedups """
136-
return (self.asi8 == tslib.iNaT).any()
141+
return self._isnan.any()
137142

138143
@property
139144
def asobject(self):
@@ -157,12 +162,11 @@ def _maybe_mask_results(self, result, fill_value=None, convert=None):
157162
"""
158163

159164
if self.hasnans:
160-
mask = self.asi8 == tslib.iNaT
161165
if convert:
162166
result = result.astype(convert)
163167
if fill_value is None:
164168
fill_value = np.nan
165-
result[mask] = fill_value
169+
result[self._isnan] = fill_value
166170
return result
167171

168172
def tolist(self):
@@ -188,8 +192,7 @@ def min(self, axis=None):
188192
return self._box_func(i8[0])
189193

190194
if self.hasnans:
191-
mask = i8 == tslib.iNaT
192-
min_stamp = self[~mask].asi8.min()
195+
min_stamp = self[~self._isnan].asi8.min()
193196
else:
194197
min_stamp = i8.min()
195198
return self._box_func(min_stamp)
@@ -207,7 +210,7 @@ def argmin(self, axis=None):
207210

208211
i8 = self.asi8
209212
if self.hasnans:
210-
mask = i8 == tslib.iNaT
213+
mask = self._isnan
211214
if mask.all():
212215
return -1
213216
i8 = i8.copy()
@@ -231,8 +234,7 @@ def max(self, axis=None):
231234
return self._box_func(i8[-1])
232235

233236
if self.hasnans:
234-
mask = i8 == tslib.iNaT
235-
max_stamp = self[~mask].asi8.max()
237+
max_stamp = self[~self._isnan].asi8.max()
236238
else:
237239
max_stamp = i8.max()
238240
return self._box_func(max_stamp)
@@ -250,7 +252,7 @@ def argmax(self, axis=None):
250252

251253
i8 = self.asi8
252254
if self.hasnans:
253-
mask = i8 == tslib.iNaT
255+
mask = self._isnan
254256
if mask.all():
255257
return -1
256258
i8 = i8.copy()
@@ -381,9 +383,8 @@ def _add_delta_td(self, other):
381383
# return the i8 result view
382384

383385
inc = tslib._delta_to_nanoseconds(other)
384-
mask = self.asi8 == tslib.iNaT
385386
new_values = (self.asi8 + inc).view(self.dtype)
386-
new_values[mask] = tslib.iNaT
387+
new_values[self._isnan] = tslib.iNaT
387388
return new_values.view(self.dtype)
388389

389390
def _add_delta_tdi(self, other):
@@ -396,7 +397,7 @@ def _add_delta_tdi(self, other):
396397

397398
self_i8 = self.asi8
398399
other_i8 = other.asi8
399-
mask = (self_i8 == tslib.iNaT) | (other_i8 == tslib.iNaT)
400+
mask = (self._isnan) | (other._isnan)
400401
new_values = self_i8 + other_i8
401402
new_values[mask] = tslib.iNaT
402403
return new_values.view(self.dtype)

pandas/tseries/index.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,8 @@ def wrapper(self, other):
8585
if o_mask.any():
8686
result[o_mask] = nat_result
8787

88-
mask = self.asi8 == tslib.iNaT
89-
if mask.any():
90-
result[mask] = nat_result
88+
if self.hasnans:
89+
result[self._isnan] = nat_result
9190

9291
# support of bool dtype indexers
9392
if com.is_bool_dtype(result):

pandas/tseries/period.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -506,9 +506,8 @@ def shift(self, n):
506506
-------
507507
shifted : PeriodIndex
508508
"""
509-
mask = self.values == tslib.iNaT
510509
values = self.values + n
511-
values[mask] = tslib.iNaT
510+
values[self._isnan] = tslib.iNaT
512511
return PeriodIndex(data=values, name=self.name, freq=self.freq)
513512

514513
@property

pandas/tseries/tdi.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,8 @@ def wrapper(self, other):
6161
if o_mask.any():
6262
result[o_mask] = nat_result
6363

64-
mask = self.asi8 == tslib.iNaT
65-
if mask.any():
66-
result[mask] = nat_result
64+
if self.hasnans:
65+
result[self._isnan] = nat_result
6766

6867
# support of bool dtype indexers
6968
if com.is_bool_dtype(result):
@@ -334,7 +333,7 @@ def _get_field(self, m):
334333
hasnans = self.hasnans
335334
if hasnans:
336335
result = np.empty(len(self), dtype='float64')
337-
mask = values == tslib.iNaT
336+
mask = self._isnan
338337
imask = ~mask
339338
result.flat[imask] = np.array([ getattr(Timedelta(val),m) for val in values[imask] ])
340339
result[mask] = np.nan

pandas/tseries/tests/test_base.py

+8
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ def test_minmax(self):
9797
for idx in [idx1, idx2]:
9898
self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz))
9999
self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz))
100+
self.assertEqual(idx.argmin(), 0)
101+
self.assertEqual(idx.argmax(), 2)
100102

101103
for op in ['min', 'max']:
102104
# Return NaT
@@ -348,6 +350,8 @@ def test_minmax(self):
348350
for idx in [idx1, idx2]:
349351
self.assertEqual(idx.min(), Timedelta('1 days')),
350352
self.assertEqual(idx.max(), Timedelta('3 days')),
353+
self.assertEqual(idx.argmin(), 0)
354+
self.assertEqual(idx.argmax(), 2)
351355

352356
for op in ['min', 'max']:
353357
# Return NaT
@@ -801,6 +805,10 @@ def test_minmax(self):
801805
for idx in [idx1, idx2]:
802806
self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D'))
803807
self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D'))
808+
self.assertEqual(idx1.argmin(), 1)
809+
self.assertEqual(idx2.argmin(), 0)
810+
self.assertEqual(idx1.argmax(), 3)
811+
self.assertEqual(idx2.argmax(), 2)
804812

805813
for op in ['min', 'max']:
806814
# Return NaT

0 commit comments

Comments
 (0)