diff --git a/pandas/_libs/src/numpy_helper.h b/pandas/_libs/src/numpy_helper.h index 5cfa51dc8a0be..3573a561945d2 100644 --- a/pandas/_libs/src/numpy_helper.h +++ b/pandas/_libs/src/numpy_helper.h @@ -50,7 +50,7 @@ PANDAS_INLINE PyObject* char_to_string(const char* data) { void set_array_not_contiguous(PyArrayObject* ao) { - ao->flags &= ~(NPY_C_CONTIGUOUS | NPY_F_CONTIGUOUS); + ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); } #endif // PANDAS__LIBS_SRC_NUMPY_HELPER_H_ diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 0ec5d25beeeb9..e4350ee8ded53 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1859,21 +1859,40 @@ cdef int64_t _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq): base, mult = get_freq_code(freq) if quarter is not None: - year, month = _quarter_to_myear(year, quarter, freq) + year, month = quarter_to_myear(year, quarter, freq) return period_ordinal(year, month, day, hour, minute, second, 0, 0, base) -def _quarter_to_myear(year, quarter, freq): - if quarter is not None: - if quarter <= 0 or quarter > 4: - raise ValueError('Quarter must be 1 <= q <= 4') +def quarter_to_myear(int year, int quarter, freq): + """ + A quarterly frequency defines a "year" which may not coincide with + the calendar-year. Find the calendar-year and calendar-month associated + with the given year and quarter under the `freq`-derived calendar. + + Parameters + ---------- + year : int + quarter : int + freq : DateOffset + + Returns + ------- + year : int + month : int + + See Also + -------- + Period.qyear + """ + if quarter <= 0 or quarter > 4: + raise ValueError('Quarter must be 1 <= q <= 4') - mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 - month = (mnum + (quarter - 1) * 3) % 12 + 1 - if month > mnum: - year -= 1 + mnum = MONTH_NUMBERS[get_rule_month(freq)] + 1 + month = (mnum + (quarter - 1) * 3) % 12 + 1 + if month > mnum: + year -= 1 return year, month diff --git a/pandas/core/arrays/__init__.py b/pandas/core/arrays/__init__.py index 1b8a43d4293a5..6ccbb872bf50e 100644 --- a/pandas/core/arrays/__init__.py +++ b/pandas/core/arrays/__init__.py @@ -3,4 +3,4 @@ from .categorical import Categorical # noqa from .datetimes import DatetimeArrayMixin # noqa from .period import PeriodArrayMixin # noqa -from .timedelta import TimedeltaArrayMixin # noqa +from .timedeltas import TimedeltaArrayMixin # noqa diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 65f34b847f8d0..ec430e4bf17b1 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -10,19 +10,53 @@ DIFFERENT_FREQ_INDEX, IncompatibleFrequency) from pandas.errors import NullFrequencyError, PerformanceWarning +from pandas import compat from pandas.tseries import frequencies from pandas.tseries.offsets import Tick from pandas.core.dtypes.common import ( + needs_i8_conversion, + is_list_like, + is_bool_dtype, is_period_dtype, is_timedelta64_dtype, is_object_dtype) +from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame, ABCIndexClass import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr +def _make_comparison_op(op, cls): + # TODO: share code with indexes.base version? Main difference is that + # the block for MultiIndex was removed here. + def cmp_method(self, other): + if isinstance(other, ABCDataFrame): + return NotImplemented + + if isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + if other.ndim > 0 and len(self) != len(other): + raise ValueError('Lengths must match to compare') + + if needs_i8_conversion(self) and needs_i8_conversion(other): + # we may need to directly compare underlying + # representations + return self._evaluate_compare(other, op) + + # numpy will show a DeprecationWarning on invalid elementwise + # comparisons, this will raise in the future + with warnings.catch_warnings(record=True): + with np.errstate(all='ignore'): + result = op(self.values, np.asarray(other)) + + return result + + name = '__{name}__'.format(name=op.__name__) + # TODO: docstring? + return compat.set_function_name(cmp_method, name, cls) + + class AttributesMixin(object): @property @@ -435,3 +469,85 @@ def _addsub_offset_array(self, other, op): if not is_period_dtype(self): kwargs['freq'] = 'infer' return type(self)(res_values, **kwargs) + + # -------------------------------------------------------------- + # Comparison Methods + + def _evaluate_compare(self, other, op): + """ + We have been called because a comparison between + 8 aware arrays. numpy >= 1.11 will + now warn about NaT comparisons + """ + # Called by comparison methods when comparing datetimelike + # with datetimelike + + if not isinstance(other, type(self)): + # coerce to a similar object + if not is_list_like(other): + # scalar + other = [other] + elif lib.is_scalar(lib.item_from_zerodim(other)): + # ndarray scalar + other = [other.item()] + other = type(self)(other) + + # compare + result = op(self.asi8, other.asi8) + + # technically we could support bool dtyped Index + # for now just return the indexing array directly + mask = (self._isnan) | (other._isnan) + + filler = iNaT + if is_bool_dtype(result): + filler = False + + result[mask] = filler + return result + + # TODO: get this from ExtensionOpsMixin + @classmethod + def _add_comparison_methods(cls): + """ add in comparison methods """ + # DatetimeArray and TimedeltaArray comparison methods will + # call these as their super(...) methods + cls.__eq__ = _make_comparison_op(operator.eq, cls) + cls.__ne__ = _make_comparison_op(operator.ne, cls) + cls.__lt__ = _make_comparison_op(operator.lt, cls) + cls.__gt__ = _make_comparison_op(operator.gt, cls) + cls.__le__ = _make_comparison_op(operator.le, cls) + cls.__ge__ = _make_comparison_op(operator.ge, cls) + + +DatetimeLikeArrayMixin._add_comparison_methods() + + +# ------------------------------------------------------------------- +# Shared Constructor Helpers + +def validate_periods(periods): + """ + If a `periods` argument is passed to the Datetime/Timedelta Array/Index + constructor, cast it to an integer. + + Parameters + ---------- + periods : None, float, int + + Returns + ------- + periods : None or int + + Raises + ------ + TypeError + if periods is None, float, or int + """ + if periods is not None: + if lib.is_float(periods): + periods = int(periods) + elif not lib.is_integer(periods): + raise TypeError('periods must be a number, got {periods}' + .format(periods=periods)) + return periods diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d7dfa73c53d8d..5835090e25de1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -13,21 +13,37 @@ from pandas.util._decorators import cache_readonly from pandas.errors import PerformanceWarning +from pandas import compat from pandas.core.dtypes.common import ( _NS_DTYPE, + is_datetimelike, is_datetime64tz_dtype, is_datetime64_dtype, is_timedelta64_dtype, _ensure_int64) from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.missing import isna +from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +import pandas.core.common as com from pandas.core.algorithms import checked_add_with_arr from pandas.tseries.frequencies import to_offset, DateOffset from pandas.tseries.offsets import Tick -from .datetimelike import DatetimeLikeArrayMixin +from pandas.core.arrays import datetimelike as dtl + + +def _to_m8(key, tz=None): + """ + Timestamp-like => dt64 + """ + if not isinstance(key, Timestamp): + # this also converts strings + key = Timestamp(key, tz=tz) + + return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE) def _field_accessor(name, field, docstring=None): @@ -68,7 +84,58 @@ def f(self): return property(f) -class DatetimeArrayMixin(DatetimeLikeArrayMixin): +def _dt_array_cmp(opname, cls): + """ + Wrap comparison operations to convert datetime-like to datetime64 + """ + nat_result = True if opname == '__ne__' else False + + def wrapper(self, other): + meth = getattr(dtl.DatetimeLikeArrayMixin, opname) + + if isinstance(other, (datetime, np.datetime64, compat.string_types)): + if isinstance(other, datetime): + # GH#18435 strings get a pass from tzawareness compat + self._assert_tzawareness_compat(other) + + other = _to_m8(other, tz=self.tz) + result = meth(self, other) + if isna(other): + result.fill(nat_result) + else: + if isinstance(other, list): + other = type(self)(other) + elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)): + # Following Timestamp convention, __eq__ is all-False + # and __ne__ is all True, others raise TypeError. + if opname == '__eq__': + return np.zeros(shape=self.shape, dtype=bool) + elif opname == '__ne__': + return np.ones(shape=self.shape, dtype=bool) + raise TypeError('%s type object %s' % + (type(other), str(other))) + + if is_datetimelike(other): + self._assert_tzawareness_compat(other) + + result = meth(self, np.asarray(other)) + result = com._values_from_object(result) + + # Make sure to pass an array to result[...]; indexing with + # Series breaks with older version of numpy + o_mask = np.array(isna(other)) + if o_mask.any(): + result[o_mask] = nat_result + + if self.hasnans: + result[self._isnan] = nat_result + + return result + + return compat.set_function_name(wrapper, opname, cls) + + +class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin): """ Assumes that subclass __new__/__init__ defines: tz @@ -222,6 +289,18 @@ def __iter__(self): # ----------------------------------------------------------------- # Comparison Methods + @classmethod + def _add_comparison_methods(cls): + """add in comparison methods""" + cls.__eq__ = _dt_array_cmp('__eq__', cls) + cls.__ne__ = _dt_array_cmp('__ne__', cls) + cls.__lt__ = _dt_array_cmp('__lt__', cls) + cls.__gt__ = _dt_array_cmp('__gt__', cls) + cls.__le__ = _dt_array_cmp('__le__', cls) + cls.__ge__ = _dt_array_cmp('__ge__', cls) + # TODO: Some classes pass __eq__ while others pass operator.eq; + # standardize this. + def _has_same_tz(self, other): zzone = self._timezone @@ -335,7 +414,7 @@ def _add_delta(self, delta): The result's name is set outside of _add_delta by the calling method (__add__ or __sub__) """ - from pandas.core.arrays.timedelta import TimedeltaArrayMixin + from pandas.core.arrays.timedeltas import TimedeltaArrayMixin if isinstance(delta, (Tick, timedelta, np.timedelta64)): new_values = self._add_delta_td(delta) @@ -1021,3 +1100,6 @@ def to_julian_date(self): self.microsecond / 3600.0 / 1e+6 + self.nanosecond / 3600.0 / 1e+9 ) / 24.0) + + +DatetimeArrayMixin._add_comparison_methods() diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 000775361061e..66b1fb8db25c0 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -8,7 +8,7 @@ from pandas._libs.tslib import NaT, iNaT from pandas._libs.tslibs.period import ( Period, IncompatibleFrequency, DIFFERENT_FREQ_INDEX, - get_period_field_arr, period_asfreq_arr, _quarter_to_myear) + get_period_field_arr, period_asfreq_arr) from pandas._libs.tslibs import period as libperiod from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds from pandas._libs.tslibs.fields import isleapyear_arr @@ -26,7 +26,7 @@ from pandas.tseries import frequencies from pandas.tseries.offsets import Tick, DateOffset -from .datetimelike import DatetimeLikeArrayMixin +from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin def _field_accessor(name, alias, docstring=None): @@ -466,7 +466,7 @@ def _range_from_fields(year=None, month=None, quarter=None, day=None, year, quarter = _make_field_arrays(year, quarter) for y, q in compat.zip(year, quarter): - y, m = _quarter_to_myear(y, q, freq) + y, m = libperiod.quarter_to_myear(y, q, freq) val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base) ordinals.append(val) else: diff --git a/pandas/core/arrays/timedelta.py b/pandas/core/arrays/timedeltas.py similarity index 81% rename from pandas/core/arrays/timedelta.py rename to pandas/core/arrays/timedeltas.py index dbd481aae4f37..f027b84506164 100644 --- a/pandas/core/arrays/timedelta.py +++ b/pandas/core/arrays/timedeltas.py @@ -3,7 +3,7 @@ import numpy as np -from pandas._libs import tslibs, lib +from pandas._libs import tslibs from pandas._libs.tslibs import Timedelta, NaT from pandas._libs.tslibs.fields import get_timedelta_field from pandas._libs.tslibs.timedeltas import array_to_timedelta64 @@ -11,7 +11,7 @@ from pandas import compat from pandas.core.dtypes.common import ( - _TD_DTYPE, _ensure_int64, is_timedelta64_dtype) + _TD_DTYPE, _ensure_int64, is_timedelta64_dtype, is_list_like) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna @@ -20,7 +20,19 @@ from pandas.tseries.offsets import Tick, DateOffset from pandas.tseries.frequencies import to_offset -from .datetimelike import DatetimeLikeArrayMixin +from . import datetimelike as dtl + + +def _to_m8(key): + """ + Timedelta-like => dt64 + """ + if not isinstance(key, Timedelta): + # this also converts strings + key = Timedelta(key) + + # return an type that can be compared + return np.int64(key.value).view(_TD_DTYPE) def _is_convertible_to_td(key): @@ -42,7 +54,47 @@ def f(self): return property(f) -class TimedeltaArrayMixin(DatetimeLikeArrayMixin): +def _td_array_cmp(opname, cls): + """ + Wrap comparison operations to convert timedelta-like to timedelta64 + """ + nat_result = True if opname == '__ne__' else False + + def wrapper(self, other): + msg = "cannot compare a {cls} with type {typ}" + meth = getattr(dtl.DatetimeLikeArrayMixin, opname) + if _is_convertible_to_td(other) or other is NaT: + try: + other = _to_m8(other) + except ValueError: + # failed to parse as timedelta + raise TypeError(msg.format(cls=type(self).__name__, + typ=type(other).__name__)) + result = meth(self, other) + if isna(other): + result.fill(nat_result) + + elif not is_list_like(other): + raise TypeError(msg.format(cls=type(self).__name__, + typ=type(other).__name__)) + else: + other = type(self)(other).values + result = meth(self, other) + result = com._values_from_object(result) + + o_mask = np.array(isna(other)) + if o_mask.any(): + result[o_mask] = nat_result + + if self.hasnans: + result[self._isnan] = nat_result + + return result + + return compat.set_function_name(wrapper, opname, cls) + + +class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin): @property def _box_func(self): return lambda x: Timedelta(x, unit='ns') @@ -78,20 +130,15 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None, freq != 'infer'): freq = to_offset(freq) - if periods is not None: - if lib.is_float(periods): - periods = int(periods) - elif not lib.is_integer(periods): - raise TypeError('`periods` must be a number, got {periods}' - .format(periods=periods)) + periods = dtl.validate_periods(periods) if values is None: if freq is None and com._any_none(periods, start, end): raise ValueError('Must provide freq argument if no data is ' 'supplied') else: - return cls._generate(start, end, periods, freq, - closed=closed) + return cls._generate_range(start, end, periods, freq, + closed=closed) result = cls._simple_new(values, freq=freq) if freq == 'infer': @@ -102,7 +149,7 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None, return result @classmethod - def _generate(cls, start, end, periods, freq, closed=None, **kwargs): + def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): # **kwargs are for compat with TimedeltaIndex, which includes `name` if com._count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' @@ -219,6 +266,19 @@ def _evaluate_with_timedelta_like(self, other, op): return NotImplemented + # ---------------------------------------------------------------- + # Comparison Methods + + @classmethod + def _add_comparison_methods(cls): + """add in comparison methods""" + cls.__eq__ = _td_array_cmp('__eq__', cls) + cls.__ne__ = _td_array_cmp('__ne__', cls) + cls.__lt__ = _td_array_cmp('__lt__', cls) + cls.__gt__ = _td_array_cmp('__gt__', cls) + cls.__le__ = _td_array_cmp('__le__', cls) + cls.__ge__ = _td_array_cmp('__ge__', cls) + # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timedelta methods @@ -332,6 +392,9 @@ def f(x): return result +TimedeltaArrayMixin._add_comparison_methods() + + # --------------------------------------------------------------------- # Constructor Helpers diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index a0456630c9a0f..ed416c3ef857d 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -285,7 +285,9 @@ def is_list_like(obj): """ return (isinstance(obj, Iterable) and + # we do not count strings/unicode/bytes as list-like not isinstance(obj, string_and_binary_types) and + # exclude zero-dimensional numpy arrays, effectively scalars not (isinstance(obj, np.ndarray) and obj.ndim == 0)) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 78fa6f8217157..419e543ae8044 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -45,7 +45,6 @@ is_datetime64tz_dtype, is_timedelta64_dtype, is_hashable, - needs_i8_conversion, is_iterator, is_list_like, is_scalar) @@ -87,11 +86,6 @@ def cmp_method(self, other): if other.ndim > 0 and len(self) != len(other): raise ValueError('Lengths must match to compare') - # we may need to directly compare underlying - # representations - if needs_i8_conversion(self) and needs_i8_conversion(other): - return self._evaluate_compare(other, op) - from .multi import MultiIndex if is_object_dtype(self) and not isinstance(self, MultiIndex): # don't pass MultiIndex @@ -4628,9 +4622,6 @@ def _evaluate_with_timedelta_like(self, other, op): def _evaluate_with_datetime_like(self, other, op): raise TypeError("can only perform ops with datetime like values") - def _evaluate_compare(self, other, op): - raise com.AbstractMethodError(self) - @classmethod def _add_comparison_methods(cls): """ add in comparison methods """ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 37e20496aafce..3f0bdf18f7230 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -223,7 +223,7 @@ def _validate_frequency(cls, index, freq, **kwargs): if index.empty or inferred == freq.freqstr: return None - on_freq = cls._generate( + on_freq = cls._generate_range( index[0], None, len(index), None, freq, **kwargs) if not np.array_equal(index.asi8, on_freq.asi8): msg = ('Inferred frequency {infer} from passed values does not ' @@ -290,34 +290,11 @@ def wrapper(left, right): return wrapper + @Appender(DatetimeLikeArrayMixin._evaluate_compare.__doc__) def _evaluate_compare(self, other, op): - """ - We have been called because a comparison between - 8 aware arrays. numpy >= 1.11 will - now warn about NaT comparisons - """ - - # coerce to a similar object - if not isinstance(other, type(self)): - if not is_list_like(other): - # scalar - other = [other] - elif is_scalar(lib.item_from_zerodim(other)): - # ndarray scalar - other = [other.item()] - other = type(self)(other) - - # compare - result = op(self.asi8, other.asi8) - - # technically we could support bool dtyped Index - # for now just return the indexing array directly - mask = (self._isnan) | (other._isnan) + result = DatetimeLikeArrayMixin._evaluate_compare(self, other, op) if is_bool_dtype(result): - result[mask] = False return result - - result[mask] = iNaT try: return Index(result) except TypeError: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 4931610e652b6..4732178d552be 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -18,7 +18,7 @@ is_integer, is_float, is_integer_dtype, - is_datetime64_ns_dtype, is_datetimelike, + is_datetime64_ns_dtype, is_period_dtype, is_bool_dtype, is_string_like, @@ -31,7 +31,8 @@ from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat -from pandas.core.arrays.datetimes import DatetimeArrayMixin +from pandas.core.arrays.datetimes import DatetimeArrayMixin, _to_m8 +from pandas.core.arrays import datetimelike as dtl from pandas.core.indexes.base import Index, _index_shared_docs from pandas.core.indexes.numeric import Int64Index, Float64Index @@ -87,49 +88,8 @@ def _dt_index_cmp(opname, cls): """ Wrap comparison operations to convert datetime-like to datetime64 """ - nat_result = True if opname == '__ne__' else False - def wrapper(self, other): - func = getattr(super(DatetimeIndex, self), opname) - - if isinstance(other, (datetime, np.datetime64, compat.string_types)): - if isinstance(other, datetime): - # GH#18435 strings get a pass from tzawareness compat - self._assert_tzawareness_compat(other) - - other = _to_m8(other, tz=self.tz) - result = func(other) - if isna(other): - result.fill(nat_result) - else: - if isinstance(other, list): - other = DatetimeIndex(other) - elif not isinstance(other, (np.ndarray, Index, ABCSeries)): - # Following Timestamp convention, __eq__ is all-False - # and __ne__ is all True, others raise TypeError. - if opname == '__eq__': - return np.zeros(shape=self.shape, dtype=bool) - elif opname == '__ne__': - return np.ones(shape=self.shape, dtype=bool) - raise TypeError('%s type object %s' % - (type(other), str(other))) - - if is_datetimelike(other): - self._assert_tzawareness_compat(other) - - result = func(np.asarray(other)) - result = com._values_from_object(result) - - # Make sure to pass an array to result[...]; indexing with - # Series breaks with older version of numpy - o_mask = np.array(isna(other)) - if o_mask.any(): - result[o_mask] = nat_result - - if self.hasnans: - result[self._isnan] = nat_result - - # support of bool dtype indexers + result = getattr(DatetimeArrayMixin, opname)(self, other) if is_bool_dtype(result): return result return Index(result) @@ -339,12 +299,7 @@ def __new__(cls, data=None, freq_infer = True freq = None - if periods is not None: - if is_float(periods): - periods = int(periods) - elif not is_integer(periods): - msg = 'periods must be a number, got {periods}' - raise TypeError(msg.format(periods=periods)) + periods = dtl.validate_periods(periods) # if dtype has an embedded tz, capture it if dtype is not None: @@ -364,9 +319,9 @@ def __new__(cls, data=None, msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: - return cls._generate(start, end, periods, name, freq, tz=tz, - normalize=normalize, closed=closed, - ambiguous=ambiguous) + return cls._generate_range(start, end, periods, name, freq, + tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) if not isinstance(data, (np.ndarray, Index, ABCSeries)): if is_scalar(data): @@ -438,8 +393,8 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) @classmethod - def _generate(cls, start, end, periods, name, freq, - tz=None, normalize=False, ambiguous='raise', closed=None): + def _generate_range(cls, start, end, periods, name, freq, tz=None, + normalize=False, ambiguous='raise', closed=None): if com._count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') @@ -521,7 +476,7 @@ def _generate(cls, start, end, periods, name, freq, index = cls._cached_range(start, end, periods=periods, freq=freq, name=name) else: - index = _generate_regular_range(start, end, periods, freq) + index = _generate_regular_range(cls, start, end, periods, freq) else: @@ -545,14 +500,15 @@ def _generate(cls, start, end, periods, name, freq, index = cls._cached_range(start, end, periods=periods, freq=freq, name=name) else: - index = _generate_regular_range(start, end, periods, freq) + index = _generate_regular_range(cls, start, end, + periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc(_ensure_int64(index), tz, ambiguous=ambiguous) - index = DatetimeIndex(arr) + index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare @@ -1764,7 +1720,7 @@ def to_julian_date(self): DatetimeIndex._add_datetimelike_methods() -def _generate_regular_range(start, end, periods, freq): +def _generate_regular_range(cls, start, end, periods, freq): if isinstance(freq, Tick): stride = freq.nanos if periods is None: @@ -1788,7 +1744,8 @@ def _generate_regular_range(start, end, periods, freq): "if a 'period' is given.") data = np.arange(b, e, stride, dtype=np.int64) - data = DatetimeIndex._simple_new(data.view(_NS_DTYPE), None, tz=tz) + # TODO: Do we need to use _simple_new here? just return data.view? + data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: if isinstance(start, Timestamp): start = start.to_pydatetime() @@ -2088,17 +2045,6 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, closed=closed, **kwargs) -def _to_m8(key, tz=None): - """ - Timestamp-like => dt64 - """ - if not isinstance(key, Timestamp): - # this also converts strings - key = Timestamp(key, tz=tz) - - return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE) - - _CACHE_START = Timestamp(datetime(1950, 1, 1)) _CACHE_END = Timestamp(datetime(2030, 1, 1)) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index eb1171c45b1e5..1ed6145f01a44 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -15,8 +15,10 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCSeries -from pandas.core.arrays.timedelta import ( - TimedeltaArrayMixin, _is_convertible_to_td) +from pandas.core.arrays.timedeltas import ( + TimedeltaArrayMixin, _is_convertible_to_td, _to_m8) +from pandas.core.arrays import datetimelike as dtl + from pandas.core.indexes.base import Index from pandas.core.indexes.numeric import Int64Index import pandas.compat as compat @@ -53,39 +55,10 @@ def _td_index_cmp(opname, cls): """ Wrap comparison operations to convert timedelta-like to timedelta64 """ - nat_result = True if opname == '__ne__' else False - def wrapper(self, other): - msg = "cannot compare a {cls} with type {typ}" - func = getattr(super(TimedeltaIndex, self), opname) - if _is_convertible_to_td(other) or other is NaT: - try: - other = _to_m8(other) - except ValueError: - # failed to parse as timedelta - raise TypeError(msg.format(cls=type(self).__name__, - typ=type(other).__name__)) - result = func(other) - if isna(other): - result.fill(nat_result) - - elif not is_list_like(other): - raise TypeError(msg.format(cls=type(self).__name__, - typ=type(other).__name__)) - else: - other = TimedeltaIndex(other).values - result = func(other) - result = com._values_from_object(result) - - o_mask = np.array(isna(other)) - if o_mask.any(): - result[o_mask] = nat_result - - if self.hasnans: - result[self._isnan] = nat_result - - # support of bool dtype indexers + result = getattr(TimedeltaArrayMixin, opname)(self, other) if is_bool_dtype(result): + # support of bool dtype indexers return result return Index(result) @@ -218,20 +191,15 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, freq_infer = True freq = None - if periods is not None: - if is_float(periods): - periods = int(periods) - elif not is_integer(periods): - msg = 'periods must be a number, got {periods}' - raise TypeError(msg.format(periods=periods)) + periods = dtl.validate_periods(periods) if data is None: if freq is None and com._any_none(periods, start, end): msg = 'Must provide freq argument if no data is supplied' raise ValueError(msg) else: - return cls._generate(start, end, periods, name, freq, - closed=closed) + return cls._generate_range(start, end, periods, name, freq, + closed=closed) if unit is not None: data = to_timedelta(data, unit=unit, box=False) @@ -248,30 +216,28 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, elif copy: data = np.array(data, copy=True) + subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs - if verify_integrity and len(data) > 0: + if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: - index = cls._simple_new(data, name=name) - cls._validate_frequency(index, freq) - index.freq = freq - return index + cls._validate_frequency(subarr, freq) if freq_infer: - index = cls._simple_new(data, name=name) - inferred = index.inferred_freq + inferred = subarr.inferred_freq if inferred: - index.freq = to_offset(inferred) - return index + subarr.freq = to_offset(inferred) + return subarr - return cls._simple_new(data, name=name, freq=freq) + return subarr @classmethod - def _generate(cls, start, end, periods, name, freq, closed=None): + def _generate_range(cls, start, end, periods, name, freq, closed=None): # TimedeltaArray gets `name` via **kwargs, so we need to explicitly # override it if name is passed as a positional argument - return super(TimedeltaIndex, cls)._generate(start, end, - periods, freq, - name=name, closed=closed) + return super(TimedeltaIndex, cls)._generate_range(start, end, + periods, freq, + name=name, + closed=closed) @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): @@ -797,18 +763,6 @@ def _is_convertible_to_index(other): return False -def _to_m8(key): - """ - Timedelta-like => dt64 - """ - if not isinstance(key, Timedelta): - # this also converts strings - key = Timedelta(key) - - # return an type that can be compared - return np.int64(key.value).view(_TD_DTYPE) - - def timedelta_range(start=None, end=None, periods=None, freq=None, name=None, closed=None): """ diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d116b3bcff86a..69e802fbaa3f0 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -4,7 +4,7 @@ import pandas as pd from pandas.core.arrays.datetimes import DatetimeArrayMixin -from pandas.core.arrays.timedelta import TimedeltaArrayMixin +from pandas.core.arrays.timedeltas import TimedeltaArrayMixin from pandas.core.arrays.period import PeriodArrayMixin diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d4ad2e4eeb2e6..387a70fe37253 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -208,8 +208,8 @@ def get_offset(name): raise ValueError(libfreqs.INVALID_FREQ_ERR_MSG.format(name)) # cache _offset_map[name] = offset - # do not return cache because it's mutable - return _offset_map[name].copy() + + return _offset_map[name] getOffset = get_offset