diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index ff5a201057bcd..f9837191a7bae 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -49,3 +49,28 @@ def time_value_counts_pindex(self): self.i.value_counts() +class period_standard_indexing(object): + goal_time = 0.2 + + def setup(self): + self.index = PeriodIndex(start='1985', periods=1000, freq='D') + self.series = Series(range(1000), index=self.index) + self.period = self.index[500] + + def time_get_loc(self): + self.index.get_loc(self.period) + + def time_shape(self): + self.index.shape + + def time_shallow_copy(self): + self.index._shallow_copy() + + def time_series_loc(self): + self.series.loc[self.period] + + def time_align(self): + pd.DataFrame({'a': self.series, 'b': self.series[:500]}) + + def time_intersection(self): + self.index[:750].intersection(self.index[250:]) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index c94e08ec41760..72dbca223ef71 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -22,6 +22,7 @@ Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Improved performance of ``.replace()`` (:issue:`12745`) +- Improved performance of ``PeriodIndex`` (:issue:`14822`) - Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`) .. _whatsnew_0192.enhancements.other: diff --git a/pandas/core/base.py b/pandas/core/base.py index a0365ce484a5a..49e43a60403ca 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -814,7 +814,7 @@ def transpose(self, *args, **kwargs): @property def shape(self): """ return a tuple of the shape of the underlying data """ - return self.values.shape + return self._values.shape @property def ndim(self): @@ -842,22 +842,22 @@ def data(self): @property def itemsize(self): """ return the size of the dtype of the item of the underlying data """ - return self.values.itemsize + return self._values.itemsize @property def nbytes(self): """ return the number of bytes in the underlying data """ - return self.values.nbytes + return self._values.nbytes @property def strides(self): """ return the strides of the underlying data """ - return self.values.strides + return self._values.strides @property def size(self): """ return the number of elements in the underlying data """ - return self.values.size + return self._values.size @property def flags(self): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 4bab3bc14461e..8c75195b25ef5 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -65,6 +65,7 @@ def dt64arr_to_periodarr(data, freq, tz): # --- Period index sketch + _DIFFERENT_FREQ_INDEX = period._DIFFERENT_FREQ_INDEX @@ -305,7 +306,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs): if (len(values) > 0 and is_float_dtype(values)): raise TypeError("PeriodIndex can't take floats") else: - return PeriodIndex(values, name=name, freq=freq, **kwargs) + return cls(values, name=name, freq=freq, **kwargs) values = np.array(values, dtype='int64', copy=False) @@ -326,6 +327,8 @@ def _shallow_copy(self, values=None, **kwargs): if kwargs.get('freq') is None: # freq must be provided kwargs['freq'] = self.freq + if values is None: + values = self._values return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs) def _coerce_scalar_to_index(self, item): @@ -356,9 +359,8 @@ def __contains__(self, key): def asi8(self): return self._values.view('i8') - @property + @cache_readonly def _int64index(self): - # do not cache, same as .asi8 return Int64Index(self.asi8, name=self.name, fastpath=True) @property diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index dae1554c0930e..ad4f669fceb42 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2101,14 +2101,6 @@ def test_comp_period(self): exp = idx.values < idx.values[10] self.assert_numpy_array_equal(result, exp) - def test_getitem_ndim2(self): - idx = period_range('2007-01', periods=3, freq='M') - - result = idx[:, None] - # MPL kludge, internally has incorrect shape - tm.assertIsInstance(result, PeriodIndex) - self.assertEqual(result.shape, (len(idx), )) - def test_getitem_index(self): idx = period_range('2007-01', periods=10, freq='M', name='x')