Skip to content

PERF/CLN: Improve datetime-like index ops perf #10277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 10, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ Performance Improvements

- Improved performance of ``andrews_curves`` (:issue:`11534`)

- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`)




Expand Down
22 changes: 10 additions & 12 deletions pandas/tseries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,12 +281,11 @@ def _maybe_mask_results(self, result, fill_value=None, convert=None):
"""

if self.hasnans:
mask = self.asi8 == tslib.iNaT
if convert:
result = result.astype(convert)
if fill_value is None:
fill_value = np.nan
result[mask] = fill_value
result[self._isnan] = fill_value
return result

def tolist(self):
Expand All @@ -312,8 +311,7 @@ def min(self, axis=None):
return self._box_func(i8[0])

if self.hasnans:
mask = i8 == tslib.iNaT
min_stamp = i8[~mask].min()
min_stamp = self[~self._isnan].asi8.min()
else:
min_stamp = i8.min()
return self._box_func(min_stamp)
Expand All @@ -331,7 +329,7 @@ def argmin(self, axis=None):

i8 = self.asi8
if self.hasnans:
mask = i8 == tslib.iNaT
mask = self._isnan
if mask.all():
return -1
i8 = i8.copy()
Expand All @@ -355,8 +353,7 @@ def max(self, axis=None):
return self._box_func(i8[-1])

if self.hasnans:
mask = i8 == tslib.iNaT
max_stamp = i8[~mask].max()
max_stamp = self[~self._isnan].asi8.max()
else:
max_stamp = i8.max()
return self._box_func(max_stamp)
Expand All @@ -374,7 +371,7 @@ def argmax(self, axis=None):

i8 = self.asi8
if self.hasnans:
mask = i8 == tslib.iNaT
mask = self._isnan
if mask.all():
return -1
i8 = i8.copy()
Expand Down Expand Up @@ -498,9 +495,9 @@ def _add_delta_td(self, other):
# return the i8 result view

inc = tslib._delta_to_nanoseconds(other)
mask = self.asi8 == tslib.iNaT
new_values = (self.asi8 + inc).view('i8')
new_values[mask] = tslib.iNaT
if self.hasnans:
new_values[self._isnan] = tslib.iNaT
return new_values.view('i8')

def _add_delta_tdi(self, other):
Expand All @@ -513,9 +510,10 @@ def _add_delta_tdi(self, other):

self_i8 = self.asi8
other_i8 = other.asi8
mask = (self_i8 == tslib.iNaT) | (other_i8 == tslib.iNaT)
new_values = self_i8 + other_i8
new_values[mask] = tslib.iNaT
if self.hasnans or other.hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = tslib.iNaT
return new_values.view(self.dtype)

def isin(self, values):
Expand Down
5 changes: 2 additions & 3 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,8 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

mask = self.asi8 == tslib.iNaT
if mask.any():
result[mask] = nat_result
if self.hasnans:
result[self._isnan] = nat_result

# support of bool dtype indexers
if com.is_bool_dtype(result):
Expand Down
4 changes: 2 additions & 2 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,9 +589,9 @@ def shift(self, n):
-------
shifted : PeriodIndex
"""
mask = self.values == tslib.iNaT
values = self.values + n * self.freq.n
values[mask] = tslib.iNaT
if self.hasnans:
values[self._isnan] = tslib.iNaT
return PeriodIndex(data=values, name=self.name, freq=self.freq)

@cache_readonly
Expand Down
7 changes: 3 additions & 4 deletions pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

mask = self.asi8 == tslib.iNaT
if mask.any():
result[mask] = nat_result
if self.hasnans:
result[self._isnan] = nat_result

# support of bool dtype indexers
if com.is_bool_dtype(result):
Expand Down Expand Up @@ -334,7 +333,7 @@ def _get_field(self, m):
hasnans = self.hasnans
if hasnans:
result = np.empty(len(self), dtype='float64')
mask = values == tslib.iNaT
mask = self._isnan
imask = ~mask
result.flat[imask] = np.array([ getattr(Timedelta(val),m) for val in values[imask] ])
result[mask] = np.nan
Expand Down
8 changes: 8 additions & 0 deletions pandas/tseries/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def test_minmax(self):
for idx in [idx1, idx2]:
self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz))
self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz))
self.assertEqual(idx.argmin(), 0)
self.assertEqual(idx.argmax(), 2)

for op in ['min', 'max']:
# Return NaT
Expand Down Expand Up @@ -579,6 +581,8 @@ def test_minmax(self):
for idx in [idx1, idx2]:
self.assertEqual(idx.min(), Timedelta('1 days')),
self.assertEqual(idx.max(), Timedelta('3 days')),
self.assertEqual(idx.argmin(), 0)
self.assertEqual(idx.argmax(), 2)

for op in ['min', 'max']:
# Return NaT
Expand Down Expand Up @@ -1209,6 +1213,10 @@ def test_minmax(self):
for idx in [idx1, idx2]:
self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D'))
self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D'))
self.assertEqual(idx1.argmin(), 1)
self.assertEqual(idx2.argmin(), 0)
self.assertEqual(idx1.argmax(), 3)
self.assertEqual(idx2.argmax(), 2)

for op in ['min', 'max']:
# Return NaT
Expand Down