Skip to content

Commit eec0bc6

Browse files
committed
Merge pull request #10277 from sinhrks/dti_isnan
PERF/CLN: Improve datetime-like index ops perf
2 parents b80b5c7 + 4b1aa75 commit eec0bc6

File tree

6 files changed

+27
-21
lines changed

6 files changed

+27
-21
lines changed

doc/source/whatsnew/v0.18.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,8 @@ Performance Improvements
139139

140140
- Improved performance of ``andrews_curves`` (:issue:`11534`)
141141

142+
- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`)
143+
142144

143145

144146

pandas/tseries/base.py

+10-12
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,11 @@ def _maybe_mask_results(self, result, fill_value=None, convert=None):
281281
"""
282282

283283
if self.hasnans:
284-
mask = self.asi8 == tslib.iNaT
285284
if convert:
286285
result = result.astype(convert)
287286
if fill_value is None:
288287
fill_value = np.nan
289-
result[mask] = fill_value
288+
result[self._isnan] = fill_value
290289
return result
291290

292291
def tolist(self):
@@ -312,8 +311,7 @@ def min(self, axis=None):
312311
return self._box_func(i8[0])
313312

314313
if self.hasnans:
315-
mask = i8 == tslib.iNaT
316-
min_stamp = i8[~mask].min()
314+
min_stamp = self[~self._isnan].asi8.min()
317315
else:
318316
min_stamp = i8.min()
319317
return self._box_func(min_stamp)
@@ -331,7 +329,7 @@ def argmin(self, axis=None):
331329

332330
i8 = self.asi8
333331
if self.hasnans:
334-
mask = i8 == tslib.iNaT
332+
mask = self._isnan
335333
if mask.all():
336334
return -1
337335
i8 = i8.copy()
@@ -355,8 +353,7 @@ def max(self, axis=None):
355353
return self._box_func(i8[-1])
356354

357355
if self.hasnans:
358-
mask = i8 == tslib.iNaT
359-
max_stamp = i8[~mask].max()
356+
max_stamp = self[~self._isnan].asi8.max()
360357
else:
361358
max_stamp = i8.max()
362359
return self._box_func(max_stamp)
@@ -374,7 +371,7 @@ def argmax(self, axis=None):
374371

375372
i8 = self.asi8
376373
if self.hasnans:
377-
mask = i8 == tslib.iNaT
374+
mask = self._isnan
378375
if mask.all():
379376
return -1
380377
i8 = i8.copy()
@@ -498,9 +495,9 @@ def _add_delta_td(self, other):
498495
# return the i8 result view
499496

500497
inc = tslib._delta_to_nanoseconds(other)
501-
mask = self.asi8 == tslib.iNaT
502498
new_values = (self.asi8 + inc).view('i8')
503-
new_values[mask] = tslib.iNaT
499+
if self.hasnans:
500+
new_values[self._isnan] = tslib.iNaT
504501
return new_values.view('i8')
505502

506503
def _add_delta_tdi(self, other):
@@ -513,9 +510,10 @@ def _add_delta_tdi(self, other):
513510

514511
self_i8 = self.asi8
515512
other_i8 = other.asi8
516-
mask = (self_i8 == tslib.iNaT) | (other_i8 == tslib.iNaT)
517513
new_values = self_i8 + other_i8
518-
new_values[mask] = tslib.iNaT
514+
if self.hasnans or other.hasnans:
515+
mask = (self._isnan) | (other._isnan)
516+
new_values[mask] = tslib.iNaT
519517
return new_values.view(self.dtype)
520518

521519
def isin(self, values):

pandas/tseries/index.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,8 @@ def wrapper(self, other):
9393
if o_mask.any():
9494
result[o_mask] = nat_result
9595

96-
mask = self.asi8 == tslib.iNaT
97-
if mask.any():
98-
result[mask] = nat_result
96+
if self.hasnans:
97+
result[self._isnan] = nat_result
9998

10099
# support of bool dtype indexers
101100
if com.is_bool_dtype(result):

pandas/tseries/period.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -589,9 +589,9 @@ def shift(self, n):
589589
-------
590590
shifted : PeriodIndex
591591
"""
592-
mask = self.values == tslib.iNaT
593592
values = self.values + n * self.freq.n
594-
values[mask] = tslib.iNaT
593+
if self.hasnans:
594+
values[self._isnan] = tslib.iNaT
595595
return PeriodIndex(data=values, name=self.name, freq=self.freq)
596596

597597
@cache_readonly

pandas/tseries/tdi.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,8 @@ def wrapper(self, other):
5151
if o_mask.any():
5252
result[o_mask] = nat_result
5353

54-
mask = self.asi8 == tslib.iNaT
55-
if mask.any():
56-
result[mask] = nat_result
54+
if self.hasnans:
55+
result[self._isnan] = nat_result
5756

5857
# support of bool dtype indexers
5958
if com.is_bool_dtype(result):
@@ -334,7 +333,7 @@ def _get_field(self, m):
334333
hasnans = self.hasnans
335334
if hasnans:
336335
result = np.empty(len(self), dtype='float64')
337-
mask = values == tslib.iNaT
336+
mask = self._isnan
338337
imask = ~mask
339338
result.flat[imask] = np.array([ getattr(Timedelta(val),m) for val in values[imask] ])
340339
result[mask] = np.nan

pandas/tseries/tests/test_base.py

+8
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,8 @@ def test_minmax(self):
124124
for idx in [idx1, idx2]:
125125
self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz))
126126
self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz))
127+
self.assertEqual(idx.argmin(), 0)
128+
self.assertEqual(idx.argmax(), 2)
127129

128130
for op in ['min', 'max']:
129131
# Return NaT
@@ -579,6 +581,8 @@ def test_minmax(self):
579581
for idx in [idx1, idx2]:
580582
self.assertEqual(idx.min(), Timedelta('1 days')),
581583
self.assertEqual(idx.max(), Timedelta('3 days')),
584+
self.assertEqual(idx.argmin(), 0)
585+
self.assertEqual(idx.argmax(), 2)
582586

583587
for op in ['min', 'max']:
584588
# Return NaT
@@ -1209,6 +1213,10 @@ def test_minmax(self):
12091213
for idx in [idx1, idx2]:
12101214
self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D'))
12111215
self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D'))
1216+
self.assertEqual(idx1.argmin(), 1)
1217+
self.assertEqual(idx2.argmin(), 0)
1218+
self.assertEqual(idx1.argmax(), 3)
1219+
self.assertEqual(idx2.argmax(), 2)
12121220

12131221
for op in ['min', 'max']:
12141222
# Return NaT

0 commit comments

Comments
 (0)