diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c1b7ff82f4c76..6db2cb409df85 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -139,6 +139,8 @@ Performance Improvements - Improved performance of ``andrews_curves`` (:issue:`11534`) +- Improved huge ``DatetimeIndex``, ``PeriodIndex`` and ``TimedeltaIndex``'s ops performance including ``NaT`` (:issue:`10277`) + diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index 4f0780ef2d660..ed9bf8d3862ce 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -281,12 +281,11 @@ def _maybe_mask_results(self, result, fill_value=None, convert=None): """ if self.hasnans: - mask = self.asi8 == tslib.iNaT if convert: result = result.astype(convert) if fill_value is None: fill_value = np.nan - result[mask] = fill_value + result[self._isnan] = fill_value return result def tolist(self): @@ -312,8 +311,7 @@ def min(self, axis=None): return self._box_func(i8[0]) if self.hasnans: - mask = i8 == tslib.iNaT - min_stamp = i8[~mask].min() + min_stamp = self[~self._isnan].asi8.min() else: min_stamp = i8.min() return self._box_func(min_stamp) @@ -331,7 +329,7 @@ def argmin(self, axis=None): i8 = self.asi8 if self.hasnans: - mask = i8 == tslib.iNaT + mask = self._isnan if mask.all(): return -1 i8 = i8.copy() @@ -355,8 +353,7 @@ def max(self, axis=None): return self._box_func(i8[-1]) if self.hasnans: - mask = i8 == tslib.iNaT - max_stamp = i8[~mask].max() + max_stamp = self[~self._isnan].asi8.max() else: max_stamp = i8.max() return self._box_func(max_stamp) @@ -374,7 +371,7 @@ def argmax(self, axis=None): i8 = self.asi8 if self.hasnans: - mask = i8 == tslib.iNaT + mask = self._isnan if mask.all(): return -1 i8 = i8.copy() @@ -498,9 +495,9 @@ def _add_delta_td(self, other): # return the i8 result view inc = tslib._delta_to_nanoseconds(other) - mask = self.asi8 == tslib.iNaT new_values = (self.asi8 + inc).view('i8') - new_values[mask] = tslib.iNaT + if self.hasnans: + new_values[self._isnan] = tslib.iNaT return new_values.view('i8') def _add_delta_tdi(self, other): @@ -513,9 +510,10 @@ def _add_delta_tdi(self, other): self_i8 = self.asi8 other_i8 = other.asi8 - mask = (self_i8 == tslib.iNaT) | (other_i8 == tslib.iNaT) new_values = self_i8 + other_i8 - new_values[mask] = tslib.iNaT + if self.hasnans or other.hasnans: + mask = (self._isnan) | (other._isnan) + new_values[mask] = tslib.iNaT return new_values.view(self.dtype) def isin(self, values): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 88816fd0c0dad..14acfb57afe56 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -93,9 +93,8 @@ def wrapper(self, other): if o_mask.any(): result[o_mask] = nat_result - mask = self.asi8 == tslib.iNaT - if mask.any(): - result[mask] = nat_result + if self.hasnans: + result[self._isnan] = nat_result # support of bool dtype indexers if com.is_bool_dtype(result): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 3f4bba0344ca0..534804900c5e6 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -589,9 +589,9 @@ def shift(self, n): ------- shifted : PeriodIndex """ - mask = self.values == tslib.iNaT values = self.values + n * self.freq.n - values[mask] = tslib.iNaT + if self.hasnans: + values[self._isnan] = tslib.iNaT return PeriodIndex(data=values, name=self.name, freq=self.freq) @cache_readonly diff --git a/pandas/tseries/tdi.py b/pandas/tseries/tdi.py index 3f884ee32dd76..ea61e4f247e58 100644 --- a/pandas/tseries/tdi.py +++ b/pandas/tseries/tdi.py @@ -51,9 +51,8 @@ def wrapper(self, other): if o_mask.any(): result[o_mask] = nat_result - mask = self.asi8 == tslib.iNaT - if mask.any(): - result[mask] = nat_result + if self.hasnans: + result[self._isnan] = nat_result # support of bool dtype indexers if com.is_bool_dtype(result): @@ -334,7 +333,7 @@ def _get_field(self, m): hasnans = self.hasnans if hasnans: result = np.empty(len(self), dtype='float64') - mask = values == tslib.iNaT + mask = self._isnan imask = ~mask result.flat[imask] = np.array([ getattr(Timedelta(val),m) for val in values[imask] ]) result[mask] = np.nan diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 4d353eccba972..bf37bd4afe1da 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -124,6 +124,8 @@ def test_minmax(self): for idx in [idx1, idx2]: self.assertEqual(idx.min(), pd.Timestamp('2011-01-01', tz=tz)) self.assertEqual(idx.max(), pd.Timestamp('2011-01-03', tz=tz)) + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) for op in ['min', 'max']: # Return NaT @@ -579,6 +581,8 @@ def test_minmax(self): for idx in [idx1, idx2]: self.assertEqual(idx.min(), Timedelta('1 days')), self.assertEqual(idx.max(), Timedelta('3 days')), + self.assertEqual(idx.argmin(), 0) + self.assertEqual(idx.argmax(), 2) for op in ['min', 'max']: # Return NaT @@ -1209,6 +1213,10 @@ def test_minmax(self): for idx in [idx1, idx2]: self.assertEqual(idx.min(), pd.Period('2011-01-01', freq='D')) self.assertEqual(idx.max(), pd.Period('2011-01-03', freq='D')) + self.assertEqual(idx1.argmin(), 1) + self.assertEqual(idx2.argmin(), 0) + self.assertEqual(idx1.argmax(), 3) + self.assertEqual(idx2.argmax(), 2) for op in ['min', 'max']: # Return NaT