Skip to content

PERF: PeriodIndex speed up #14931

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 21, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,28 @@ def time_value_counts_pindex(self):
self.i.value_counts()


class period_standard_indexing(object):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally this would be parameterized with Index type. For another day though.

goal_time = 0.2

def setup(self):
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.period = self.index[500]

def time_get_loc(self):
self.index.get_loc(self.period)

def time_shape(self):
self.index.shape

def time_shallow_copy(self):
self.index._shallow_copy()

def time_series_loc(self):
self.series.loc[self.period]

def time_align(self):
pd.DataFrame({'a': self.series, 'b': self.series[:500]})

def time_intersection(self):
self.index[:750].intersection(self.index[250:])
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~

- Improved performance of ``.replace()`` (:issue:`12745`)
- Improved performance of ``PeriodIndex`` (:issue:`14822`)
- Improved performance ``Series`` creation with a datetime index and dictionary data (:issue:`14894`)

.. _whatsnew_0192.enhancements.other:
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,7 +814,7 @@ def transpose(self, *args, **kwargs):
@property
def shape(self):
""" return a tuple of the shape of the underlying data """
return self.values.shape
return self._values.shape

@property
def ndim(self):
Expand Down Expand Up @@ -842,22 +842,22 @@ def data(self):
@property
def itemsize(self):
""" return the size of the dtype of the item of the underlying data """
return self.values.itemsize
return self._values.itemsize

@property
def nbytes(self):
""" return the number of bytes in the underlying data """
return self.values.nbytes
return self._values.nbytes

@property
def strides(self):
""" return the strides of the underlying data """
return self.values.strides
return self._values.strides

@property
def size(self):
""" return the number of elements in the underlying data """
return self.values.size
return self._values.size

@property
def flags(self):
Expand Down
8 changes: 5 additions & 3 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def dt64arr_to_periodarr(data, freq, tz):

# --- Period index sketch


_DIFFERENT_FREQ_INDEX = period._DIFFERENT_FREQ_INDEX


Expand Down Expand Up @@ -305,7 +306,7 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
if (len(values) > 0 and is_float_dtype(values)):
raise TypeError("PeriodIndex can't take floats")
else:
return PeriodIndex(values, name=name, freq=freq, **kwargs)
return cls(values, name=name, freq=freq, **kwargs)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would leave this here, maybe causing the ndim test sto fail

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not what's causing the test failure (it could only be different if cls != PeriodIndex


values = np.array(values, dtype='int64', copy=False)

Expand All @@ -326,6 +327,8 @@ def _shallow_copy(self, values=None, **kwargs):
if kwargs.get('freq') is None:
# freq must be provided
kwargs['freq'] = self.freq
if values is None:
values = self._values
return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs)

def _coerce_scalar_to_index(self, item):
Expand Down Expand Up @@ -356,9 +359,8 @@ def __contains__(self, key):
def asi8(self):
return self._values.view('i8')

@property
@cache_readonly
def _int64index(self):
# do not cache, same as .asi8
return Int64Index(self.asi8, name=self.name, fastpath=True)

@property
Expand Down
8 changes: 0 additions & 8 deletions pandas/tseries/tests/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -2101,14 +2101,6 @@ def test_comp_period(self):
exp = idx.values < idx.values[10]
self.assert_numpy_array_equal(result, exp)

def test_getitem_ndim2(self):
idx = period_range('2007-01', periods=3, freq='M')

result = idx[:, None]
# MPL kludge, internally has incorrect shape
tm.assertIsInstance(result, PeriodIndex)
self.assertEqual(result.shape, (len(idx), ))

def test_getitem_index(self):
idx = period_range('2007-01', periods=10, freq='M', name='x')

Expand Down