From cb923665b022ea55e1210dc41df2469a3deb02d7 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Oct 2018 17:21:17 -0700 Subject: [PATCH 1/5] De-duplicate redundant comparison method code --- pandas/core/arrays/datetimelike.py | 18 ++++-------------- pandas/core/indexes/datetimes.py | 25 ------------------------- pandas/core/indexes/timedeltas.py | 26 -------------------------- 3 files changed, 4 insertions(+), 65 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index e4ace2bfe1509..5b86eba27d1d6 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -41,7 +41,7 @@ from pandas.util._decorators import deprecate_kwarg -def _make_comparison_op(op, cls): +def _make_comparison_op(cls, op): # TODO: share code with indexes.base version? Main difference is that # the block for MultiIndex was removed here. def cmp_method(self, other): @@ -740,6 +740,9 @@ def __isub__(self, other): # -------------------------------------------------------------- # Comparison Methods + # Called by _add_comparison_methods defined in ExtensionOpsMixin + _create_comparison_method = classmethod(_make_comparison_op) + def _evaluate_compare(self, other, op): """ We have been called because a comparison between @@ -773,19 +776,6 @@ def _evaluate_compare(self, other, op): result[mask] = filler return result - # TODO: get this from ExtensionOpsMixin - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - # DatetimeArray and TimedeltaArray comparison methods will - # call these as their super(...) methods - cls.__eq__ = _make_comparison_op(operator.eq, cls) - cls.__ne__ = _make_comparison_op(operator.ne, cls) - cls.__lt__ = _make_comparison_op(operator.lt, cls) - cls.__gt__ = _make_comparison_op(operator.gt, cls) - cls.__le__ = _make_comparison_op(operator.le, cls) - cls.__ge__ = _make_comparison_op(operator.ge, cls) - DatetimeLikeArrayMixin._add_comparison_methods() diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e40ceadc1a083..a16d0080e2d57 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -82,21 +82,6 @@ def func(self, *args, **kwargs): return func -def _dt_index_cmp(cls, op): - """ - Wrap comparison operations to convert datetime-like to datetime64 - """ - opname = '__{name}__'.format(name=op.__name__) - - def wrapper(self, other): - result = getattr(DatetimeArrayMixin, opname)(self, other) - if is_bool_dtype(result): - return result - return Index(result) - - return compat.set_function_name(wrapper, opname, cls) - - def _new_DatetimeIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't have arguments and breaks __new__ """ @@ -233,16 +218,6 @@ def _join_i8_wrapper(joinf, **kwargs): _left_indexer_unique = _join_i8_wrapper( libjoin.left_join_indexer_unique_int64, with_indexers=False) - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - cls.__eq__ = _dt_index_cmp(cls, operator.eq) - cls.__ne__ = _dt_index_cmp(cls, operator.ne) - cls.__lt__ = _dt_index_cmp(cls, operator.lt) - cls.__gt__ = _dt_index_cmp(cls, operator.gt) - cls.__le__ = _dt_index_cmp(cls, operator.le) - cls.__ge__ = _dt_index_cmp(cls, operator.ge) - _engine_type = libindex.DatetimeEngine tz = None diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index ee604f44b98e0..942c1a035c862 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -50,22 +50,6 @@ def f(self): return property(f) -def _td_index_cmp(cls, op): - """ - Wrap comparison operations to convert timedelta-like to timedelta64 - """ - opname = '__{name}__'.format(name=op.__name__) - - def wrapper(self, other): - result = getattr(TimedeltaArrayMixin, opname)(self, other) - if is_bool_dtype(result): - # support of bool dtype indexers - return result - return Index(result) - - return compat.set_function_name(wrapper, opname, cls) - - class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, TimelikeOps, Int64Index): """ @@ -153,16 +137,6 @@ def _join_i8_wrapper(joinf, **kwargs): _datetimelike_methods = ["to_pytimedelta", "total_seconds", "round", "floor", "ceil"] - @classmethod - def _add_comparison_methods(cls): - """ add in comparison methods """ - cls.__eq__ = _td_index_cmp(cls, operator.eq) - cls.__ne__ = _td_index_cmp(cls, operator.ne) - cls.__lt__ = _td_index_cmp(cls, operator.lt) - cls.__gt__ = _td_index_cmp(cls, operator.gt) - cls.__le__ = _td_index_cmp(cls, operator.le) - cls.__ge__ = _td_index_cmp(cls, operator.ge) - _engine_type = libindex.TimedeltaEngine _comparables = ['name', 'freq'] From bbfe21d0800cc6e1596be443af2e375deb23d6a1 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Oct 2018 17:26:46 -0700 Subject: [PATCH 2/5] implement wrap_field_accessor, wrap_array_method for de-duplication --- pandas/core/indexes/datetimelike.py | 57 +++++++++++++++++ pandas/core/indexes/datetimes.py | 97 +++++++++-------------------- 2 files changed, 86 insertions(+), 68 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1ec30ecbb3a3b..8e919ba3599fc 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -744,3 +744,60 @@ def wrap_arithmetic_op(self, other, result): res_name = ops.get_op_result_name(self, other) result.name = res_name return result + + +def wrap_array_method(method, pin_name=False): + """ + Wrap a DatetimeArray/TimedeltaArray/PeriodArray method so that the + returned object is an Index subclass instead of ndarray or ExtensionArray + subclass. + + Parameters + ---------- + method : method of Datetime/Timedelta/Period Array class + pin_name : bool + Whether to set name=self.name on the output Index + + Returns + ------- + method + """ + def index_method(self, *args, **kwargs): + result = method(self, *args, **kwargs) + + # Index.__new__ will choose the appropriate subclass to return + result = Index(result) + if pin_name: + result.name = self.name + return result + + index_method.__name__ = method.__name__ + index_method.__doc__ = method.__doc__ + return index_method + + +def wrap_field_accessor(prop): + """ + Wrap a DatetimeArray/TimedeltaArray/PeriodArray array-returning property + to return an Index subclass instead of ndarray or ExtensionArray subclass. + + Parameters + ---------- + prop : property + + Returns + ------- + new_prop : property + """ + fget = prop.fget + + def f(self): + result = fget(self) + if is_bool_dtype(result): + # return numpy array b/c there is no BoolIndex + return result + return Index(result, name=self.name) + + f.__name__ = fget.__name__ + f.__doc__ = fget.__doc__ + return property(f) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index a16d0080e2d57..da72a032998f3 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -38,7 +38,8 @@ import pandas.compat as compat from pandas.tseries.frequencies import to_offset, Resolution from pandas.core.indexes.datetimelike import ( - DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) + DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin, + wrap_field_accessor, wrap_array_method) from pandas.tseries.offsets import ( generate_range, CDay, prefix_mapping) @@ -53,34 +54,6 @@ from pandas._libs.tslibs import (timezones, conversion, fields, parsing, ccalendar) -# -------- some conversion wrapper functions - - -def _wrap_field_accessor(name): - fget = getattr(DatetimeArrayMixin, name).fget - - def f(self): - result = fget(self) - if is_bool_dtype(result): - return result - return Index(result, name=self.name) - - f.__name__ = name - f.__doc__ = fget.__doc__ - return property(f) - - -def _wrap_in_index(name): - meth = getattr(DatetimeArrayMixin, name) - - def func(self, *args, **kwargs): - result = meth(self, *args, **kwargs) - return Index(result, name=self.name) - - func.__doc__ = meth.__doc__ - func.__name__ = name - return func - def _new_DatetimeIndex(cls, d): """ This is called upon unpickling, rather than the default which doesn't @@ -1248,38 +1221,38 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): else: raise - year = _wrap_field_accessor('year') - month = _wrap_field_accessor('month') - day = _wrap_field_accessor('day') - hour = _wrap_field_accessor('hour') - minute = _wrap_field_accessor('minute') - second = _wrap_field_accessor('second') - microsecond = _wrap_field_accessor('microsecond') - nanosecond = _wrap_field_accessor('nanosecond') - weekofyear = _wrap_field_accessor('weekofyear') + year = wrap_field_accessor(DatetimeArrayMixin.year) + month = wrap_field_accessor(DatetimeArrayMixin.month) + day = wrap_field_accessor(DatetimeArrayMixin.day) + hour = wrap_field_accessor(DatetimeArrayMixin.hour) + minute = wrap_field_accessor(DatetimeArrayMixin.minute) + second = wrap_field_accessor(DatetimeArrayMixin.second) + microsecond = wrap_field_accessor(DatetimeArrayMixin.microsecond) + nanosecond = wrap_field_accessor(DatetimeArrayMixin.nanosecond) + weekofyear = wrap_field_accessor(DatetimeArrayMixin.weekofyear) week = weekofyear - dayofweek = _wrap_field_accessor('dayofweek') + dayofweek = wrap_field_accessor(DatetimeArrayMixin.dayofweek) weekday = dayofweek - weekday_name = _wrap_field_accessor('weekday_name') + weekday_name = wrap_field_accessor(DatetimeArrayMixin.weekday_name) - dayofyear = _wrap_field_accessor('dayofyear') - quarter = _wrap_field_accessor('quarter') - days_in_month = _wrap_field_accessor('days_in_month') + dayofyear = wrap_field_accessor(DatetimeArrayMixin.dayofyear) + quarter = wrap_field_accessor(DatetimeArrayMixin.quarter) + days_in_month = wrap_field_accessor(DatetimeArrayMixin.days_in_month) daysinmonth = days_in_month - is_month_start = _wrap_field_accessor('is_month_start') - is_month_end = _wrap_field_accessor('is_month_end') - is_quarter_start = _wrap_field_accessor('is_quarter_start') - is_quarter_end = _wrap_field_accessor('is_quarter_end') - is_year_start = _wrap_field_accessor('is_year_start') - is_year_end = _wrap_field_accessor('is_year_end') - is_leap_year = _wrap_field_accessor('is_leap_year') - - @Appender(DatetimeArrayMixin.normalize.__doc__) - def normalize(self): - result = DatetimeArrayMixin.normalize(self) - result.name = self.name - return result + is_month_start = wrap_field_accessor(DatetimeArrayMixin.is_month_start) + is_month_end = wrap_field_accessor(DatetimeArrayMixin.is_month_end) + is_quarter_start = wrap_field_accessor(DatetimeArrayMixin.is_quarter_start) + is_quarter_end = wrap_field_accessor(DatetimeArrayMixin.is_quarter_end) + is_year_start = wrap_field_accessor(DatetimeArrayMixin.is_year_start) + is_year_end = wrap_field_accessor(DatetimeArrayMixin.is_year_end) + is_leap_year = wrap_field_accessor(DatetimeArrayMixin.is_leap_year) + + normalize = wrap_array_method(DatetimeArrayMixin.normalize, True) + to_julian_date = wrap_array_method(DatetimeArrayMixin.to_julian_date, + False) + month_name = wrap_array_method(DatetimeArrayMixin.month_name, True) + day_name = wrap_array_method(DatetimeArrayMixin.day_name, True) @Substitution(klass='DatetimeIndex') @Appender(_shared_docs['searchsorted']) @@ -1467,18 +1440,6 @@ def indexer_between_time(self, start_time, end_time, include_start=True, return mask.nonzero()[0] - def to_julian_date(self): - """ - Convert DatetimeIndex to Float64Index of Julian Dates. - 0 Julian date is noon January 1, 4713 BC. - http://en.wikipedia.org/wiki/Julian_day - """ - result = DatetimeArrayMixin.to_julian_date(self) - return Float64Index(result) - - month_name = _wrap_in_index("month_name") - day_name = _wrap_in_index("day_name") - DatetimeIndex._add_comparison_methods() DatetimeIndex._add_numeric_methods_disabled() From 628903855af68aca7fcfd4ab64d769419c5312df Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Oct 2018 17:32:36 -0700 Subject: [PATCH 3/5] Use wrap_field_accessor and wrap_array_method in PeriodIndex, TimedeltaIndex --- pandas/core/indexes/period.py | 48 +++++++++++-------------------- pandas/core/indexes/timedeltas.py | 27 +++++------------ 2 files changed, 24 insertions(+), 51 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f151389b02463..bfb69b2440286 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -20,7 +20,9 @@ from pandas.tseries.frequencies import get_freq_code as _gfc from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index -from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin +from pandas.core.indexes.datetimelike import ( + DatelikeOps, DatetimeIndexOpsMixin, + wrap_array_method, wrap_field_accessor) from pandas.core.tools.datetimes import parse_time_string from pandas._libs.lib import infer_dtype @@ -43,19 +45,6 @@ _index_doc_kwargs.update( dict(target_klass='PeriodIndex or list of Periods')) - -def _wrap_field_accessor(name): - fget = getattr(PeriodArrayMixin, name).fget - - def f(self): - result = fget(self) - return Index(result, name=self.name) - - f.__name__ = name - f.__doc__ = fget.__doc__ - return property(f) - - # --- Period index sketch @@ -431,22 +420,24 @@ def is_full(self): values = self.asi8 return ((values[1:] - values[:-1]) < 2).all() - year = _wrap_field_accessor('year') - month = _wrap_field_accessor('month') - day = _wrap_field_accessor('day') - hour = _wrap_field_accessor('hour') - minute = _wrap_field_accessor('minute') - second = _wrap_field_accessor('second') - weekofyear = _wrap_field_accessor('week') + year = wrap_field_accessor(PeriodArrayMixin.year) + month = wrap_field_accessor(PeriodArrayMixin.month) + day = wrap_field_accessor(PeriodArrayMixin.day) + hour = wrap_field_accessor(PeriodArrayMixin.hour) + minute = wrap_field_accessor(PeriodArrayMixin.minute) + second = wrap_field_accessor(PeriodArrayMixin.second) + weekofyear = wrap_field_accessor(PeriodArrayMixin.week) week = weekofyear - dayofweek = _wrap_field_accessor('dayofweek') + dayofweek = wrap_field_accessor(PeriodArrayMixin.dayofweek) weekday = dayofweek - dayofyear = day_of_year = _wrap_field_accessor('dayofyear') - quarter = _wrap_field_accessor('quarter') - qyear = _wrap_field_accessor('qyear') - days_in_month = _wrap_field_accessor('days_in_month') + dayofyear = day_of_year = wrap_field_accessor(PeriodArrayMixin.dayofyear) + quarter = wrap_field_accessor(PeriodArrayMixin.quarter) + qyear = wrap_field_accessor(PeriodArrayMixin.qyear) + days_in_month = wrap_field_accessor(PeriodArrayMixin.days_in_month) daysinmonth = days_in_month + to_timestamp = wrap_array_method(PeriodArrayMixin.to_timestamp, True) + @property @Appender(PeriodArrayMixin.start_time.__doc__) def start_time(self): @@ -461,11 +452,6 @@ def _mpl_repr(self): # how to represent ourselves to matplotlib return self.astype(object).values - @Appender(PeriodArrayMixin.to_timestamp.__doc__) - def to_timestamp(self, freq=None, how='start'): - result = PeriodArrayMixin.to_timestamp(self, freq=freq, how=how) - return DatetimeIndex(result, name=self.name) - @property def inferred_type(self): # b/c data is represented as ints make sure we can't have ambiguous diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 942c1a035c862..aaad1831304ad 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -31,24 +31,14 @@ import pandas.core.dtypes.concat as _concat from pandas.util._decorators import Appender, Substitution from pandas.core.indexes.datetimelike import ( - TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op) + TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op, + wrap_array_method, wrap_field_accessor) from pandas.core.tools.timedeltas import ( to_timedelta, _coerce_scalar_to_timedelta_type) from pandas._libs import (lib, index as libindex, join as libjoin, Timedelta, NaT) -def _wrap_field_accessor(name): - fget = getattr(TimedeltaArrayMixin, name).fget - - def f(self): - result = fget(self) - return Index(result, name=self.name) - - f.__name__ = name - f.__doc__ = fget.__doc__ - return property(f) - class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, TimelikeOps, Int64Index): @@ -243,15 +233,12 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): nat_rep=na_rep, justify='all').get_result() - days = _wrap_field_accessor("days") - seconds = _wrap_field_accessor("seconds") - microseconds = _wrap_field_accessor("microseconds") - nanoseconds = _wrap_field_accessor("nanoseconds") + days = wrap_field_accessor(TimedeltaArrayMixin.days) + seconds = wrap_field_accessor(TimedeltaArrayMixin.seconds) + microseconds = wrap_field_accessor(TimedeltaArrayMixin.microseconds) + nanoseconds = wrap_field_accessor(TimedeltaArrayMixin.nanoseconds) - @Appender(TimedeltaArrayMixin.total_seconds.__doc__) - def total_seconds(self): - result = TimedeltaArrayMixin.total_seconds(self) - return Index(result, name=self.name) + total_seconds = wrap_array_method(TimedeltaArrayMixin.total_seconds, True) @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): From e272e14ee792eca6692ce268064afe582ee3aefa Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Oct 2018 17:36:19 -0700 Subject: [PATCH 4/5] flake8 fixup --- pandas/core/indexes/datetimes.py | 3 +-- pandas/core/indexes/timedeltas.py | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index da72a032998f3..253c87719baa8 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -20,7 +20,6 @@ is_integer_dtype, is_datetime64_ns_dtype, is_period_dtype, - is_bool_dtype, is_string_like, is_list_like, is_scalar, @@ -34,7 +33,7 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index, _index_shared_docs -from pandas.core.indexes.numeric import Int64Index, Float64Index +from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat from pandas.tseries.frequencies import to_offset, Resolution from pandas.core.indexes.datetimelike import ( diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index aaad1831304ad..cdd621a9be0a7 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -1,5 +1,4 @@ """ implement the TimedeltaIndex """ -import operator from datetime import datetime import numpy as np @@ -7,7 +6,6 @@ _TD_DTYPE, is_integer, is_float, - is_bool_dtype, is_list_like, is_scalar, is_timedelta64_dtype, @@ -39,7 +37,6 @@ join as libjoin, Timedelta, NaT) - class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin, TimelikeOps, Int64Index): """ From 19e0b76010ef5ea8e858d00bef7faa05221515ef Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 11 Oct 2018 19:03:41 -0700 Subject: [PATCH 5/5] Fixup _add_comparison_methods--> _add_comparison_ops --- pandas/core/arrays/datetimelike.py | 2 +- pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/timedeltas.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 5b86eba27d1d6..73c0c3c5056bc 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -777,7 +777,7 @@ def _evaluate_compare(self, other, op): return result -DatetimeLikeArrayMixin._add_comparison_methods() +DatetimeLikeArrayMixin._add_comparison_ops() # ------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 253c87719baa8..87009d692689a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1440,7 +1440,7 @@ def indexer_between_time(self, start_time, end_time, include_start=True, return mask.nonzero()[0] -DatetimeIndex._add_comparison_methods() +DatetimeIndex._add_comparison_ops() DatetimeIndex._add_numeric_methods_disabled() DatetimeIndex._add_logical_methods_disabled() DatetimeIndex._add_datetimelike_methods() diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index cdd621a9be0a7..56b6dc7051d9f 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -666,7 +666,7 @@ def delete(self, loc): return TimedeltaIndex(new_tds, name=self.name, freq=freq) -TimedeltaIndex._add_comparison_methods() +TimedeltaIndex._add_comparison_ops() TimedeltaIndex._add_numeric_methods() TimedeltaIndex._add_logical_methods_disabled() TimedeltaIndex._add_datetimelike_methods()