diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py index d42be56963569..e9499cd43b267 100644 --- a/pandas/compat/numpy/function.py +++ b/pandas/compat/numpy/function.py @@ -58,10 +58,30 @@ def __call__(self, args, kwargs, fname=None, ARGMINMAX_DEFAULTS = dict(out=None) -validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', - method='both', max_fname_arg_count=1) -validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax', - method='both', max_fname_arg_count=1) +_validate_argmin = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmin', + method='both', max_fname_arg_count=1) +_validate_argmax = CompatValidator(ARGMINMAX_DEFAULTS, fname='argmax', + method='both', max_fname_arg_count=1) + + +def validate_argmin(args, kwargs, axis=None): + _validate_argmin(args, kwargs) + validate_minmax_axis(axis) + + +def validate_argmax(args, kwargs, axis=None): + _validate_argmax(args, kwargs) + validate_minmax_axis(axis) + + +def validate_min(args, kwargs, axis=None): + _validate_min(args, kwargs) + validate_minmax_axis(axis) + + +def validate_max(args, kwargs, axis=None): + _validate_max(args, kwargs) + validate_minmax_axis(axis) def process_skipna(skipna, args): @@ -196,10 +216,10 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name): validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs') MINMAX_DEFAULTS = dict(out=None) -validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', - method='both', max_fname_arg_count=1) -validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', - method='both', max_fname_arg_count=1) +_validate_min = CompatValidator(MINMAX_DEFAULTS, fname='min', + method='both', max_fname_arg_count=1) +_validate_max = CompatValidator(MINMAX_DEFAULTS, fname='max', + method='both', max_fname_arg_count=1) RESHAPE_DEFAULTS = dict(order='C') validate_reshape = CompatValidator(RESHAPE_DEFAULTS, fname='reshape', @@ -360,3 +380,24 @@ def validate_resampler_func(method, args, kwargs): "{func}() instead".format(func=method))) else: raise TypeError("too many arguments passed in") + + +def validate_minmax_axis(axis): + """ + Ensure that the axis argument passed to min, max, argmin, or argmax is + zero or None, as otherwise it will be incorrectly ignored. + + Parameters + ---------- + axis : int or None + + Raises + ------ + ValueError + """ + ndim = 1 # hard-coded for Index + if axis is None: + return + if axis >= ndim or (axis < 0 and ndim + axis < 0): + raise ValueError("`axis` must be fewer than the number of " + "dimensions ({ndim})".format(ndim=ndim)) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index a98b0b3bf35f9..6b9211294d8a3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -11,6 +11,7 @@ from pandas._libs.tslibs.period import ( Period, DIFFERENT_FREQ_INDEX, IncompatibleFrequency) +from pandas.util._decorators import deprecate_kwarg from pandas.errors import NullFrequencyError, PerformanceWarning from pandas import compat @@ -39,7 +40,6 @@ from pandas.core.algorithms import checked_add_with_arr from .base import ExtensionOpsMixin -from pandas.util._decorators import deprecate_kwarg def _make_comparison_op(cls, op): @@ -143,6 +143,10 @@ def asi8(self): # ------------------------------------------------------------------ # Array-like Methods + @property + def ndim(self): + return len(self.shape) + @property def shape(self): return (len(self),) @@ -151,6 +155,10 @@ def shape(self): def size(self): return np.prod(self.shape) + @property + def nbytes(self): + return self._ndarray_values.nbytes + def __len__(self): return len(self._data) @@ -211,6 +219,10 @@ def astype(self, dtype, copy=True): # ------------------------------------------------------------------ # Null Handling + def isna(self): + # EA Interface + return self._isnan + @property # NB: override with cache_readonly in immutable subclasses def _isnan(self): """ return if each value is nan""" @@ -332,6 +344,10 @@ def _validate_frequency(cls, index, freq, **kwargs): # Frequency validation is not meaningful for Period Array/Index return None + # DatetimeArray may pass `ambiguous`, nothing else will be accepted + # by cls._generate_range below + assert all(key == 'ambiguous' for key in kwargs) + inferred = index.inferred_freq if index.size == 0 or inferred == freq.freqstr: return None @@ -595,9 +611,12 @@ def _time_shift(self, periods, freq=None): start = self[0] + periods * self.freq end = self[-1] + periods * self.freq - attribs = self._get_attributes_dict() + + # Note: in the DatetimeTZ case, _generate_range will infer the + # appropriate timezone from `start` and `end`, so tz does not need + # to be passed explicitly. return self._generate_range(start=start, end=end, periods=None, - **attribs) + freq=self.freq) @classmethod def _add_datetimelike_methods(cls): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 8624ddd8965e8..525ced5827121 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -19,6 +19,7 @@ from pandas.core.dtypes.common import ( is_integer_dtype, is_float_dtype, is_period_dtype, is_timedelta64_dtype, + is_object_dtype, is_datetime64_dtype, _TD_DTYPE) from pandas.core.dtypes.dtypes import PeriodDtype from pandas.core.dtypes.generic import ABCSeries @@ -122,18 +123,30 @@ def freq(self, value): _attributes = ["freq"] - def __new__(cls, values, freq=None, **kwargs): + def __new__(cls, values, freq=None, dtype=None, **kwargs): + + if freq is not None: + # coerce freq to freq object, otherwise it can be coerced + # elementwise, which is slow + freq = Period._maybe_convert_freq(freq) + + freq = dtl.validate_dtype_freq(dtype, freq) + if is_period_dtype(values): # PeriodArray, PeriodIndex - if freq is not None and values.freq != freq: - raise IncompatibleFrequency(freq, values.freq) - freq = values.freq + freq = dtl.validate_dtype_freq(values.dtype, freq) values = values.asi8 elif is_datetime64_dtype(values): - # TODO: what if it has tz? values = dt64arr_to_periodarr(values, freq) + elif is_object_dtype(values) or isinstance(values, (list, tuple)): + # e.g. array([Period(...), Period(...), NaT]) + values = np.array(values, dtype=object) + if freq is None: + freq = libperiod.extract_freq(values) + values = libperiod.extract_ordinals(values, freq) + return cls._simple_new(values, freq=freq, **kwargs) @classmethod @@ -176,11 +189,13 @@ def _from_ordinals(cls, values, freq=None, **kwargs): @classmethod def _generate_range(cls, start, end, periods, freq, fields): + periods = dtl.validate_periods(periods) + if freq is not None: freq = Period._maybe_convert_freq(freq) field_count = len(fields) - if com.count_not_none(start, end) > 0: + if start is not None or end is not None: if field_count > 0: raise ValueError('Can either instantiate from fields ' 'or endpoints, but not both') diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 4904a90ab7b2b..eb7dabdc03b0b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -126,8 +126,7 @@ def _simple_new(cls, values, freq=None, **kwargs): result._freq = freq return result - def __new__(cls, values, freq=None, start=None, end=None, periods=None, - closed=None): + def __new__(cls, values, freq=None): freq, freq_infer = dtl.maybe_infer_freq(freq) @@ -140,8 +139,7 @@ def __new__(cls, values, freq=None, start=None, end=None, periods=None, return result @classmethod - def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): - # **kwargs are for compat with TimedeltaIndex, which includes `name` + def _generate_range(cls, start, end, periods, freq, closed=None): periods = dtl.validate_periods(periods) if freq is None and any(x is None for x in [periods, start, end]): @@ -167,10 +165,9 @@ def _generate_range(cls, start, end, periods, freq, closed=None, **kwargs): if freq is not None: index = _generate_regular_range(start, end, periods, freq) - index = cls._simple_new(index, freq=freq, **kwargs) + index = cls._simple_new(index, freq=freq) else: index = np.linspace(start.value, end.value, periods).astype('i8') - # TODO: shouldn't we pass `name` here? (via **kwargs) index = cls._simple_new(index, freq=freq) if not left_closed: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 8e919ba3599fc..b6cd3067f226b 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -430,7 +430,7 @@ def min(self, axis=None, *args, **kwargs): -------- numpy.ndarray.min """ - nv.validate_min(args, kwargs) + nv.validate_min(args, kwargs, axis=axis) try: i8 = self.asi8 @@ -458,7 +458,7 @@ def argmin(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmin """ - nv.validate_argmin(args, kwargs) + nv.validate_argmin(args, kwargs, axis=axis) i8 = self.asi8 if self.hasnans: @@ -478,7 +478,7 @@ def max(self, axis=None, *args, **kwargs): -------- numpy.ndarray.max """ - nv.validate_max(args, kwargs) + nv.validate_max(args, kwargs, axis=axis) try: i8 = self.asi8 @@ -506,7 +506,7 @@ def argmax(self, axis=None, *args, **kwargs): -------- numpy.ndarray.argmax """ - nv.validate_argmax(args, kwargs) + nv.validate_argmax(args, kwargs, axis=axis) i8 = self.asi8 if self.hasnans: @@ -699,6 +699,12 @@ def astype(self, dtype, copy=True): raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy) + @Appender(DatetimeLikeArrayMixin._time_shift.__doc__) + def _time_shift(self, periods, freq=None): + result = DatetimeLikeArrayMixin._time_shift(self, periods, freq=freq) + result.name = self.name + return result + def _ensure_datetimelike_to_i8(other, to_utc=False): """ diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e0219acc115b5..c9107d6509848 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -241,9 +241,11 @@ def __new__(cls, data=None, if data is None: # TODO: Remove this block and associated kwargs; GH#20535 - return cls._generate_range(start, end, periods, name, freq, - tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) + result.name = name + return result if not isinstance(data, (np.ndarray, Index, ABCSeries, DatetimeArrayMixin)): @@ -315,17 +317,6 @@ def __new__(cls, data=None, return subarr._deepcopy_if_needed(ref_to_data, copy) - @classmethod - @Appender(DatetimeArrayMixin._generate_range.__doc__) - def _generate_range(cls, start, end, periods, name=None, freq=None, - tz=None, normalize=False, ambiguous='raise', - closed=None): - out = super(DatetimeIndex, cls)._generate_range( - start, end, periods, freq, - tz=tz, normalize=normalize, ambiguous=ambiguous, closed=closed) - out.name = name - return out - def _convert_for_op(self, value): """ Convert value to be insertable to ndarray """ if self._has_same_tz(value): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index f452a57e82725..ef88ef9ccb624 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -165,8 +165,6 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, raise TypeError('__new__() got an unexpected keyword argument {}'. format(list(set(fields) - valid_field_set)[0])) - periods = dtl.validate_periods(periods) - if name is None and hasattr(data, 'name'): name = data.name diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 56b6dc7051d9f..78fffb0f92958 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -147,12 +147,10 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, if data is None: # TODO: Remove this block and associated kwargs; GH#20535 - if freq is None and com._any_none(periods, start, end): - raise ValueError('Must provide freq argument if no data is ' - 'supplied') - periods = dtl.validate_periods(periods) - return cls._generate_range(start, end, periods, name, freq, - closed=closed) + out = cls._generate_range(start, end, periods, + freq=freq, closed=closed) + out.name = name + return out if unit is not None: data = to_timedelta(data, unit=unit, box=False) @@ -181,16 +179,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, return subarr - @classmethod - def _generate_range(cls, start, end, periods, - name=None, freq=None, closed=None): - # TimedeltaArray gets `name` via **kwargs, so we need to explicitly - # override it if name is passed as a positional argument - return super(TimedeltaIndex, cls)._generate_range(start, end, - periods, freq, - name=name, - closed=closed) - @classmethod def _simple_new(cls, values, name=None, freq=None, **kwargs): result = super(TimedeltaIndex, cls)._simple_new(values, freq, **kwargs) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9cceff30c9e0e..ce219ddf89e75 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2476,8 +2476,7 @@ def _get_index_factory(self, klass): if klass == DatetimeIndex: def f(values, freq=None, tz=None): # data are already in UTC, localize and convert if tz present - result = DatetimeIndex._simple_new(values.values, name=None, - freq=freq) + result = DatetimeIndex(values.values, name=None, freq=freq) if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 6bb4241451b3f..93cab19297fc2 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -34,7 +34,7 @@ def datetime_index(request): A fixture to provide DatetimeIndex objects with different frequencies. Most DatetimeArray behavior is already tested in DatetimeIndex tests, - so here we just test that the DatetimeIndex behavior matches + so here we just test that the DatetimeArray behavior matches the DatetimeIndex behavior. """ freqstr = request.param @@ -45,6 +45,19 @@ def datetime_index(request): return pi +@pytest.fixture +def timedelta_index(request): + """ + A fixture to provide TimedeltaIndex objects with different frequencies. + + Most TimedeltaArray behavior is already tested in TimedeltaIndex tests, + so here we just test that the TimedeltaArray behavior matches + the TimedeltaIndex behavior. + """ + # TODO: flesh this out + return pd.TimedeltaIndex(['1 Day', '3 Hours', 'NaT']) + + class TestDatetimeArray(object): def test_from_dti(self, tz_naive_fixture): @@ -122,9 +135,51 @@ def test_astype_object(self): assert asobj.dtype == 'O' assert list(asobj) == list(tdi) + def test_to_pytimedelta(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + expected = tdi.to_pytimedelta() + result = arr.to_pytimedelta() + + tm.assert_numpy_array_equal(result, expected) + + def test_total_seconds(self, timedelta_index): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + expected = tdi.total_seconds() + result = arr.total_seconds() + + tm.assert_numpy_array_equal(result, expected.values) + + @pytest.mark.parametrize('propname', pd.TimedeltaIndex._field_ops) + def test_int_properties(self, timedelta_index, propname): + tdi = timedelta_index + arr = TimedeltaArrayMixin(tdi) + + result = getattr(arr, propname) + expected = np.array(getattr(tdi, propname), dtype=result.dtype) + + tm.assert_numpy_array_equal(result, expected) + class TestPeriodArray(object): + def test_from_object_dtype(self, period_index): + pi = period_index + arr = PeriodArrayMixin(pd.Index(pi, dtype=object)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(np.array(pi, dtype=object)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(list(pi)) + assert list(arr) == list(pi) + + arr = PeriodArrayMixin(tuple(pi)) + assert list(arr) == list(pi) + def test_from_pi(self, period_index): pi = period_index arr = PeriodArrayMixin(pi) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index e32e18ea0ec4a..7af8b259fa137 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -8,6 +8,18 @@ class DatetimeLike(Base): + def test_argmax_axis_invalid(self): + # GH#23081 + rng = self.create_index() + with pytest.raises(ValueError): + rng.argmax(axis=1) + with pytest.raises(ValueError): + rng.argmin(axis=2) + with pytest.raises(ValueError): + rng.min(axis=-2) + with pytest.raises(ValueError): + rng.max(axis=-3) + def test_can_hold_identifiers(self): idx = self.create_index() key = idx[0]