Skip to content

More speedups for Period comparisons #21606

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ Performance Improvements

- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
- Improved performance of :func:`pandas.core.groupby.GroupBy.rank` when dealing with tied rankings (:issue:`21237`)
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`)
- Improved performance of :func:`DataFrame.set_index` with columns consisting of :class:`Period` objects (:issue:`21582`,:issue:`21606`)
-

.. _whatsnew_0240.docs:
Expand Down
44 changes: 44 additions & 0 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,15 @@ for _d in DAYS:
# ---------------------------------------------------------------------
# Misc Helpers

cdef to_offset(object obj):
"""
Wrap pandas.tseries.frequencies.to_offset to keep centralize runtime
imports
"""
from pandas.tseries.frequencies import to_offset
return to_offset(obj)


def as_datetime(obj):
f = getattr(obj, 'to_pydatetime', None)
if f is not None:
Expand Down Expand Up @@ -313,6 +322,41 @@ class _BaseOffset(object):
def __setattr__(self, name, value):
raise AttributeError("DateOffset objects are immutable.")

def __eq__(self, other):
if is_string_object(other):
other = to_offset(other)

try:
return self._params == other._params
except AttributeError:
# other is not a DateOffset object
return False

return self._params == other._params

def __ne__(self, other):
return not self == other

def __hash__(self):
return hash(self._params)

@property
def _params(self):
"""
Returns a tuple containing all of the attributes needed to evaluate
equality between two DateOffset objects.
"""
# NB: non-cython subclasses override property with cache_readonly
all_paras = self.__dict__.copy()
if 'holidays' in all_paras and not all_paras['holidays']:
all_paras.pop('holidays')
exclude = ['kwds', 'name', 'calendar']
attrs = [(k, v) for k, v in all_paras.items()
if (k not in exclude) and (k[0] != '_')]
attrs = sorted(set(attrs))
params = tuple([str(self.__class__)] + attrs)
return params

@property
def kwds(self):
# for backwards-compatibility
Expand Down
31 changes: 1 addition & 30 deletions pandas/tseries/offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ def __add__(date):

Since 0 is a bit weird, we suggest avoiding its use.
"""
_params = cache_readonly(BaseOffset._params.fget)
_use_relativedelta = False
_adjust_dst = False
_attributes = frozenset(['n', 'normalize'] +
Expand Down Expand Up @@ -288,18 +289,6 @@ def isAnchored(self):
# if there were a canonical docstring for what isAnchored means.
return (self.n == 1)

@cache_readonly
def _params(self):
all_paras = self.__dict__.copy()
if 'holidays' in all_paras and not all_paras['holidays']:
all_paras.pop('holidays')
exclude = ['kwds', 'name', 'calendar']
attrs = [(k, v) for k, v in all_paras.items()
if (k not in exclude) and (k[0] != '_')]
attrs = sorted(set(attrs))
params = tuple([str(self.__class__)] + attrs)
return params

# TODO: Combine this with BusinessMixin version by defining a whitelisted
# set of attributes on each object rather than the existing behavior of
# iterating over internal ``__dict__``
Expand All @@ -322,24 +311,6 @@ def _repr_attrs(self):
def name(self):
return self.rule_code

def __eq__(self, other):

if isinstance(other, compat.string_types):
from pandas.tseries.frequencies import to_offset

other = to_offset(other)

if not isinstance(other, DateOffset):
return False

return self._params == other._params

def __ne__(self, other):
return not self == other

def __hash__(self):
return hash(self._params)

def __add__(self, other):
if isinstance(other, (ABCDatetimeIndex, ABCSeries)):
return other + self
Expand Down