From d7637c9f3a5da88e0cb7b2a1cca8b476f8878ddc Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 13 Aug 2016 16:27:33 +0900 Subject: [PATCH] API: PeriodIndex.values now return array of Period objects --- doc/source/whatsnew/v0.19.0.txt | 26 ++++- pandas/indexes/base.py | 27 +++-- pandas/io/pytables.py | 24 ++-- pandas/tests/indexes/common.py | 15 ++- pandas/tests/indexes/test_datetimelike.py | 2 +- pandas/tests/indexing/test_coercion.py | 28 +++-- pandas/tests/indexing/test_indexing.py | 4 +- pandas/tests/test_base.py | 2 +- pandas/tseries/base.py | 2 +- pandas/tseries/converter.py | 6 +- pandas/tseries/period.py | 128 ++++++++++++++-------- pandas/tseries/resample.py | 5 +- pandas/tseries/tests/test_base.py | 4 +- pandas/tseries/tests/test_period.py | 55 +++++++--- 14 files changed, 219 insertions(+), 109 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 222bd250034d8..8186719bfb28b 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -16,7 +16,7 @@ Highlights include: - :func:`merge_asof` for asof-style time-series joining, see :ref:`here ` - ``.rolling()`` are now time-series aware, see :ref:`here ` - pandas development api, see :ref:`here ` -- ``PeriodIndex`` now has its own ``period`` dtype. see ref:`here ` +- ``PeriodIndex`` now has its own ``period`` dtype, and changed to be more consistent with other ``Index`` classes. See ref:`here ` .. contents:: What's new in v0.19.0 :local: @@ -643,10 +643,13 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) -.. _whatsnew_0190.api.perioddtype: +.. _whatsnew_0190.api.period: + +``Period`` changes +^^^^^^^^^^^^^^^^^^ ``PeriodIndex`` now has ``period`` dtype -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +"""""""""""""""""""""""""""""""""""""""" ``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a pandas extension dtype like ``category`` or :ref:`timezone aware dtype ` (``datetime64[ns, tz]``). (:issue:`13941`). @@ -681,7 +684,7 @@ New Behavior: .. _whatsnew_0190.api.periodnat: ``Period('NaT')`` now returns ``pd.NaT`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +"""""""""""""""""""""""""""""""""""""""" Previously, ``Period`` has its own ``Period('NaT')`` representation different from ``pd.NaT``. Now ``Period('NaT')`` has been changed to return ``pd.NaT``. (:issue:`12759`, :issue:`13582`) @@ -719,6 +722,21 @@ New Behavior: pd.NaT + 1 pd.NaT - 1 +``PeriodIndex.values`` now returns array of ``Period`` object +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" + +``.values`` is changed to return array of ``Period`` object, rather than array +of ``int64`` (:issue:`13988`) + +.. code-block:: ipython + In [6]: pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + In [7]: pi.values + array([492, 493]) + +.. ipython:: python + + pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') + pi.values .. _whatsnew_0190.api.difference: diff --git a/pandas/indexes/base.py b/pandas/indexes/base.py index e4e5a4e4cfec7..49b16ec9a71ab 100644 --- a/pandas/indexes/base.py +++ b/pandas/indexes/base.py @@ -1251,7 +1251,7 @@ def _constructor(self): @cache_readonly def _engine(self): # property, for now, slow to look up - return self._engine_type(lambda: self.values, len(self)) + return self._engine_type(lambda: self._values, len(self)) def _validate_index_level(self, level): """ @@ -1823,13 +1823,13 @@ def union(self, other): if self.is_monotonic and other.is_monotonic: try: - result = self._outer_indexer(self.values, other._values)[0] + result = self._outer_indexer(self._values, other._values)[0] except TypeError: # incomparable objects - result = list(self.values) + result = list(self._values) # worth making this faster? a very unusual case - value_set = set(self.values) + value_set = set(self._values) result.extend([x for x in other._values if x not in value_set]) else: indexer = self.get_indexer(other) @@ -1838,10 +1838,10 @@ def union(self, other): if len(indexer) > 0: other_diff = algos.take_nd(other._values, indexer, allow_fill=False) - result = _concat._concat_compat((self.values, other_diff)) + result = _concat._concat_compat((self._values, other_diff)) try: - self.values[0] < other_diff[0] + self._values[0] < other_diff[0] except TypeError as e: warnings.warn("%s, sort order is undefined for " "incomparable objects" % e, RuntimeWarning, @@ -1853,7 +1853,7 @@ def union(self, other): result.sort() else: - result = self.values + result = self._values try: result = np.sort(result) @@ -1906,17 +1906,17 @@ def intersection(self, other): if self.is_monotonic and other.is_monotonic: try: - result = self._inner_indexer(self.values, other._values)[0] + result = self._inner_indexer(self._values, other._values)[0] return self._wrap_union_result(other, result) except TypeError: pass try: - indexer = Index(self.values).get_indexer(other._values) + indexer = Index(self._values).get_indexer(other._values) indexer = indexer.take((indexer != -1).nonzero()[0]) except: # duplicates - indexer = Index(self.values).get_indexer_non_unique( + indexer = Index(self._values).get_indexer_non_unique( other._values)[0].unique() indexer = indexer[indexer != -1] @@ -2536,7 +2536,7 @@ def _reindex_non_unique(self, target): missing = _ensure_platform_int(missing) missing_labels = target.take(missing) missing_indexer = _ensure_int64(l[~check]) - cur_labels = self.take(indexer[check])._values + cur_labels = self.take(indexer[check]).values cur_indexer = _ensure_int64(l[check]) new_labels = np.empty(tuple([len(indexer)]), dtype=object) @@ -2556,7 +2556,7 @@ def _reindex_non_unique(self, target): else: # need to retake to have the same size as the indexer - indexer = indexer._values + indexer = indexer.values indexer[~check] = 0 # reset the new indexer to account for the new size @@ -2879,7 +2879,7 @@ def _join_monotonic(self, other, how='left', return_indexers=False): else: return ret_index - sv = self.values + sv = self._values ov = other._values if self.is_unique and other.is_unique: @@ -3185,7 +3185,6 @@ def insert(self, loc, item): """ _self = np.asarray(self) item = self._coerce_scalar_to_index(item)._values - idx = np.concatenate((_self[:loc], item, _self[loc:])) return self._shallow_copy_with_infer(idx) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 5229936bd8a04..f77076e54f34d 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2349,6 +2349,11 @@ def f(values, freq=None, tz=None): return DatetimeIndex._simple_new(values, None, freq=freq, tz=tz) return f + elif klass == PeriodIndex: + def f(values, freq=None, tz=None): + return PeriodIndex._simple_new(values, None, freq=freq) + return f + return klass def validate_read(self, kwargs): @@ -2450,7 +2455,9 @@ def write_index(self, key, index): setattr(self.attrs, '%s_variety' % key, 'regular') converted = _convert_index(index, self.encoding, self.format_type).set_name('index') + self.write_array(key, converted.values) + node = getattr(self.group, key) node._v_attrs.kind = converted.kind node._v_attrs.name = index.name @@ -2552,12 +2559,12 @@ def read_index_node(self, node, start=None, stop=None): kwargs['tz'] = node._v_attrs['tz'] if kind in (u('date'), u('datetime')): - index = factory( - _unconvert_index(data, kind, encoding=self.encoding), - dtype=object, **kwargs) + index = factory(_unconvert_index(data, kind, + encoding=self.encoding), + dtype=object, **kwargs) else: - index = factory( - _unconvert_index(data, kind, encoding=self.encoding), **kwargs) + index = factory(_unconvert_index(data, kind, + encoding=self.encoding), **kwargs) index.name = name @@ -4377,9 +4384,10 @@ def _convert_index(index, encoding=None, format_type=None): index_name=index_name) elif isinstance(index, (Int64Index, PeriodIndex)): atom = _tables().Int64Col() - return IndexCol( - index.values, 'integer', atom, freq=getattr(index, 'freq', None), - index_name=index_name) + # avoid to store ndarray of Period objects + return IndexCol(index._values, 'integer', atom, + freq=getattr(index, 'freq', None), + index_name=index_name) if isinstance(index, MultiIndex): raise TypeError('MultiIndex not supported here!') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 59b98ebcff82a..f7e8a4e858441 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -245,9 +245,18 @@ def test_ensure_copied_data(self): tm.assert_numpy_array_equal(index.values, result.values, check_same='copy') - result = index_type(index.values, copy=False, **init_kwargs) - tm.assert_numpy_array_equal(index.values, result.values, - check_same='same') + if not isinstance(index, PeriodIndex): + result = index_type(index.values, copy=False, **init_kwargs) + tm.assert_numpy_array_equal(index.values, result.values, + check_same='same') + tm.assert_numpy_array_equal(index._values, result._values, + check_same='same') + else: + # .values an object array of Period, thus copied + result = index_type(ordinal=index.asi8, copy=False, + **init_kwargs) + tm.assert_numpy_array_equal(index._values, result._values, + check_same='same') def test_copy_and_deepcopy(self): from copy import copy, deepcopy diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index bcc6532fbe0ce..7502a4ce26b04 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -781,7 +781,7 @@ def test_astype(self): idx = period_range('1990', '2009', freq='A') result = idx.astype('i8') self.assert_index_equal(result, Index(idx.asi8)) - self.assert_numpy_array_equal(result.values, idx.values) + self.assert_numpy_array_equal(result.values, idx.asi8) def test_astype_raises(self): # GH 13149, GH 13209 diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index d8d8242fa50c6..5fbaea6c5efcb 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -490,16 +490,30 @@ def test_insert_index_period(self): self._assert_insert_conversion(obj, pd.Period('2012-01', freq='M'), exp, 'period[M]') - # ToDo: must coerce to object? - exp = pd.PeriodIndex(['2011-01', '2012-01', '2011-02', - '2011-03', '2011-04'], freq='M') + # period + datetime64 => object + exp = pd.Index([pd.Period('2011-01', freq='M'), + pd.Timestamp('2012-01-01'), + pd.Period('2011-02', freq='M'), + pd.Period('2011-03', freq='M'), + pd.Period('2011-04', freq='M')], freq='M') self._assert_insert_conversion(obj, pd.Timestamp('2012-01-01'), - exp, 'period[M]') + exp, np.object) # period + int => object - msg = "Given date string not likely a datetime." - with tm.assertRaisesRegexp(ValueError, msg): - print(obj.insert(1, 1)) + exp = pd.Index([pd.Period('2011-01', freq='M'), + 1, + pd.Period('2011-02', freq='M'), + pd.Period('2011-03', freq='M'), + pd.Period('2011-04', freq='M')], freq='M') + self._assert_insert_conversion(obj, 1, exp, np.object) + + # period + object => object + exp = pd.Index([pd.Period('2011-01', freq='M'), + 'x', + pd.Period('2011-02', freq='M'), + pd.Period('2011-03', freq='M'), + pd.Period('2011-04', freq='M')], freq='M') + self._assert_insert_conversion(obj, 'x', exp, np.object) class TestWhereCoercion(CoercionBase, tm.TestCase): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index b051b92e15540..e0d63d5aa0c44 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -4137,8 +4137,8 @@ def test_series_partial_set_period(self): idx = pd.period_range('2011-01-01', '2011-01-02', freq='D', name='idx') ser = Series([0.1, 0.2], index=idx, name='s') - result = ser.loc[[pd.Period('2011-01-01', freq='D'), pd.Period( - '2011-01-02', freq='D')]] + result = ser.loc[[pd.Period('2011-01-01', freq='D'), + pd.Period('2011-01-02', freq='D')]] exp = Series([0.1, 0.2], index=idx, name='s') tm.assert_series_equal(result, exp, check_index_type=True) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 52cd65af42c5e..66216758ca091 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -393,7 +393,7 @@ def test_ops(self): if not isinstance(o, PeriodIndex): expected = getattr(o.values, op)() else: - expected = pd.Period(ordinal=getattr(o.values, op)(), + expected = pd.Period(ordinal=getattr(o._values, op)(), freq=o.freq) try: self.assertEqual(result, expected) diff --git a/pandas/tseries/base.py b/pandas/tseries/base.py index ad774d1b92202..e64a0d2ebaf5e 100644 --- a/pandas/tseries/base.py +++ b/pandas/tseries/base.py @@ -323,7 +323,7 @@ def sort_values(self, return_indexer=False, ascending=True): sorted_index = self.take(_as) return sorted_index, _as else: - sorted_values = np.sort(self.values) + sorted_values = np.sort(self._values) attribs = self._get_attributes_dict() freq = attribs['freq'] diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index a23e8af3e610c..8f8519a498a31 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -141,11 +141,11 @@ def convert(values, units, axis): is_float(values)): return get_datevalue(values, axis.freq) if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq).values + return values.asfreq(axis.freq)._values if isinstance(values, Index): return values.map(lambda x: get_datevalue(x, axis.freq)) if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq).values + return PeriodIndex(values, freq=axis.freq)._values if isinstance(values, (list, tuple, np.ndarray, Index)): return [get_datevalue(x, axis.freq) for x in values] return values @@ -518,7 +518,7 @@ def _daily_finder(vmin, vmax, freq): info = np.zeros(span, dtype=[('val', np.int64), ('maj', bool), ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_.values + info['val'][:] = dates_._values info['fmt'][:] = '' info['maj'][[0, -1]] = True # .. and set some shortcuts diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 9b2fa705df385..8bce01b0759fc 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -35,11 +35,11 @@ _quarter_to_myear) from pandas.core.base import _shared_docs -from pandas.indexes.base import _index_shared_docs +from pandas.indexes.base import _index_shared_docs, _ensure_index from pandas import compat from pandas.util.decorators import Appender, cache_readonly, Substitution -from pandas.lib import Timedelta +from pandas.lib import infer_dtype import pandas.tslib as tslib from pandas.compat import zip, u @@ -47,7 +47,7 @@ def _field_accessor(name, alias, docstring=None): def f(self): base, mult = _gfc(self.freq) - return get_period_field_arr(alias, self.values, base) + return get_period_field_arr(alias, self._values, base) f.__name__ = name f.__doc__ = docstring return property(f) @@ -73,7 +73,7 @@ def _period_index_cmp(opname, nat_result=False): def wrapper(self, other): if isinstance(other, Period): - func = getattr(self.values, opname) + func = getattr(self._values, opname) other_base, _ = _gfc(other.freq) if other.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) @@ -85,7 +85,7 @@ def wrapper(self, other): msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr) raise IncompatibleFrequency(msg) - result = getattr(self.values, opname)(other.values) + result = getattr(self._values, opname)(other._values) mask = self._isnan | other._isnan if mask.any(): @@ -93,11 +93,11 @@ def wrapper(self, other): return result elif other is tslib.NaT: - result = np.empty(len(self.values), dtype=bool) + result = np.empty(len(self._values), dtype=bool) result.fill(nat_result) else: other = Period(other, freq=self.freq) - func = getattr(self.values, opname) + func = getattr(self._values, opname) result = func(other.ordinal) if self.hasnans: @@ -265,13 +265,17 @@ def _from_arraylike(cls, data, freq, tz): if isinstance(data, PeriodIndex): if freq is None or freq == data.freq: freq = data.freq - data = data.values + data = data._values else: base1, _ = _gfc(data.freq) base2, _ = _gfc(freq) - data = period.period_asfreq_arr(data.values, + data = period.period_asfreq_arr(data._values, base1, base2, 1) else: + if is_object_dtype(data): + inferred = infer_dtype(data) + if inferred == 'integer': + data = data.astype(np.int64) if freq is None and is_object_dtype(data): # must contain Period instance and thus extract ordinals @@ -286,11 +290,8 @@ def _from_arraylike(cls, data, freq, tz): if np.issubdtype(data.dtype, np.datetime64): data = dt64arr_to_periodarr(data, freq, tz) else: - try: - data = _ensure_int64(data) - except (TypeError, ValueError): - data = _ensure_object(data) - data = period.extract_ordinals(data, freq) + data = _ensure_object(data) + data = period.extract_ordinals(data, freq) return data, freq @@ -349,6 +350,29 @@ def __contains__(self, key): return False return False + @property + def asi8(self): + return self._values.view('i8') + + @property + def _int64index(self): + # do not cache, same as .asi8 + return Int64Index(self.asi8, name=self.name, fastpath=True) + + @property + def values(self): + return self.asobject.values + + @property + def _values(self): + return self._data + + def __array__(self, dtype=None): + if is_integer_dtype(dtype): + return self.asi8 + else: + return self.asobject.values + def __array_wrap__(self, result, context=None): """ Gets called after a ufunc. Needs additional handling as @@ -359,15 +383,17 @@ def __array_wrap__(self, result, context=None): if isinstance(context, tuple) and len(context) > 0: func = context[0] if (func is np.add): - try: - return self._add_delta(context[1][1]) - except IncompatibleFrequency: - raise TypeError + pass elif (func is np.subtract): - try: - return self._add_delta(-context[1][1]) - except IncompatibleFrequency: - raise TypeError + name = self.name + left = context[1][0] + right = context[1][1] + if (isinstance(left, PeriodIndex) and + isinstance(right, PeriodIndex)): + name = left.name if left.name == right.name else None + return Index(result, name=name) + elif isinstance(left, Period) or isinstance(right, Period): + return Index(result, name=name) elif isinstance(func, np.ufunc): if 'M->M' not in func.types: msg = "ufunc '{0}' not supported for the PeriodIndex" @@ -377,7 +403,9 @@ def __array_wrap__(self, result, context=None): if is_bool_dtype(result): return result - return self._shallow_copy(result) + # the result is object dtype array of Period + # cannot pass _simple_new as it is + return PeriodIndex(result, freq=self.freq, name=self.name) @property def _box_func(self): @@ -393,11 +421,6 @@ def _to_embed(self, keep_tz=False): def _formatter_func(self): return lambda x: "'%s'" % x - @property - def _int64index(self): - # do not cache, same as .asi8 - return Int64Index(self.asi8, name=self.name, fastpath=True) - def asof_locs(self, where, mask): """ where : array of timestamps @@ -408,13 +431,13 @@ def asof_locs(self, where, mask): if isinstance(where_idx, DatetimeIndex): where_idx = PeriodIndex(where_idx.values, freq=self.freq) - locs = self.values[mask].searchsorted(where_idx.values, side='right') + locs = self._values[mask].searchsorted(where_idx._values, side='right') locs = np.where(locs > 0, locs - 1, 0) result = np.arange(len(self))[mask].take(locs) first = mask.argmax() - result[(locs == 0) & (where_idx.values < self.values[first])] = -1 + result[(locs == 0) & (where_idx._values < self._values[first])] = -1 return result @@ -424,8 +447,10 @@ def astype(self, dtype, copy=True, how='start'): if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): - return Index(self.values.astype('i8', copy=copy), name=self.name, - dtype='i8') + if copy: + return self._int64index.copy() + else: + return self._int64index elif is_datetime64_dtype(dtype): return self.to_timestamp(how=how) elif is_datetime64tz_dtype(dtype): @@ -445,7 +470,7 @@ def searchsorted(self, key, side='left', sorter=None): elif isinstance(key, compat.string_types): key = Period(key, freq=self.freq).ordinal - return self.values.searchsorted(key, side=side, sorter=sorter) + return self._values.searchsorted(key, side=side, sorter=sorter) @property def is_all_dates(self): @@ -570,8 +595,7 @@ def equals(self, other): if self.is_(other): return True - if (not hasattr(other, 'inferred_type') or - other.inferred_type != 'int64'): + if not isinstance(other, PeriodIndex): try: other = PeriodIndex(other) except: @@ -605,12 +629,11 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) new_data = self.asfreq(freq, how) - new_data = period.periodarr_to_dt64arr(new_data.values, base) + new_data = period.periodarr_to_dt64arr(new_data._values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) def _maybe_convert_timedelta(self, other): - if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, Timedelta)): + if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -681,7 +704,7 @@ def shift(self, n): ------- shifted : PeriodIndex """ - values = self.values + n * self.freq.n + values = self._values + n * self.freq.n if self.hasnans: values[self._isnan] = tslib.iNaT return PeriodIndex(data=values, name=self.name, freq=self.freq) @@ -712,7 +735,7 @@ def get_value(self, series, key): grp = frequencies.Resolution.get_freq_group(reso) freqn = frequencies.get_freq_group(self.freq) - vals = self.values + vals = self._values # if our data is higher resolution than requested key, slice if grp < freqn: @@ -723,7 +746,7 @@ def get_value(self, series, key): if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) - pos = np.searchsorted(self.values, [ord1, ord2]) + pos = np.searchsorted(self._values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: @@ -740,10 +763,19 @@ def get_value(self, series, key): series, key) def get_indexer(self, target, method=None, limit=None, tolerance=None): + target = _ensure_index(target) + if hasattr(target, 'freq') and target.freq != self.freq: msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, target.freqstr) raise IncompatibleFrequency(msg) - return Index.get_indexer(self, target, method, limit, tolerance) + + if isinstance(target, PeriodIndex): + target = target.asi8 + + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance) + return Index.get_indexer(self._int64index, target, method, + limit, tolerance) def get_loc(self, key, method=None, tolerance=None): """ @@ -862,6 +894,14 @@ def _convert_tolerance(self, tolerance): tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance) return self._maybe_convert_timedelta(tolerance) + def insert(self, loc, item): + if not isinstance(item, Period) or self.freq != item.freq: + return self.asobject.insert(loc, item) + + idx = np.concatenate((self[:loc].asi8, np.array([item.ordinal]), + self[loc:].asi8)) + return self._shallow_copy(idx) + def join(self, other, how='left', level=None, return_indexers=False): """ See Index.join @@ -949,10 +989,10 @@ def append(self, other): # box to_concat = [x.asobject.values for x in to_concat] else: - cat_values = np.concatenate([x.values for x in to_concat]) + cat_values = np.concatenate([x._values for x in to_concat]) return PeriodIndex(cat_values, freq=self.freq, name=name) - to_concat = [x.values if isinstance(x, Index) else x + to_concat = [x._values if isinstance(x, Index) else x for x in to_concat] return Index(com._concat_compat(to_concat), name=name) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 38c2e009a01f3..5c4bfe5360fac 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -784,7 +784,7 @@ def _get_new_index(self): else: start = ax[0].asfreq(self.freq, how=self.convention) end = ax[-1].asfreq(self.freq, how='end') - values = period_range(start, end, freq=self.freq).values + values = period_range(start, end, freq=self.freq).asi8 return ax._shallow_copy(values, freq=self.freq) @@ -815,7 +815,8 @@ def _downsample(self, how, **kwargs): if len(new_index) == 0: bins = [] else: - rng = np.arange(memb.values[0], memb.values[-1] + 1) + i8 = memb.asi8 + rng = np.arange(i8[0], i8[-1] + 1) bins = memb.searchsorted(rng, side='right') grouper = BinGrouper(bins, new_index) return self._groupby_and_aggregate(how, grouper=grouper) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 0d6c991f00c8b..4d3c60ce39291 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -2364,8 +2364,8 @@ def _check_freq(index, expected_index): freq='D') result = pidx.sort_values() - expected = PeriodIndex( - ['NaT', '2011', '2011', '2013'], name='pidx', freq='D') + expected = PeriodIndex(['NaT', '2011', '2011', '2013'], + name='pidx', freq='D') self.assert_index_equal(result, expected) self.assertEqual(result.freq, 'D') diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index fe6dcf69e0b4e..1ddcc11c15a59 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1747,8 +1747,12 @@ def test_constructor_corner(self): def test_constructor_fromarraylike(self): idx = period_range('2007-01', periods=20, freq='M') - self.assertRaises(ValueError, PeriodIndex, idx.values) - self.assertRaises(ValueError, PeriodIndex, list(idx.values)) + # values is an array of Period, thus can retrieve freq + tm.assert_index_equal(PeriodIndex(idx.values), idx) + tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) + + self.assertRaises(ValueError, PeriodIndex, idx._values) + self.assertRaises(ValueError, PeriodIndex, list(idx._values)) self.assertRaises(ValueError, PeriodIndex, data=Period('2007', freq='A')) @@ -2027,26 +2031,29 @@ def test_view_asi8(self): tm.assert_numpy_array_equal(idx.asi8, exp) def test_values(self): - # ToDo: .values and .get_values() should return Period as object - # dtype array. ._values shouldn't be changed idx = pd.PeriodIndex([], freq='M') - exp = np.array([], dtype=np.int64) + exp = np.array([], dtype=np.object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx._values, exp) idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M') - exp = np.array([492, -9223372036854775808], dtype=np.int64) + exp = np.array([pd.Period('2011-01', freq='M'), pd.NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._values, exp) - exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D') + + exp = np.array([pd.Period('2011-01-01', freq='D'), pd.NaT], + dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.get_values(), exp) + exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._values, exp) def test_asobject_like(self): @@ -2100,7 +2107,7 @@ def test_getitem_ndim2(self): result = idx[:, None] # MPL kludge, internally has incorrect shape tm.assertIsInstance(result, PeriodIndex) - self.assertEqual(result.shape, (len(idx), 1)) + self.assertEqual(result.shape, (len(idx), )) def test_getitem_index(self): idx = period_range('2007-01', periods=10, freq='M', name='x') @@ -4153,19 +4160,23 @@ def test_pi_ops_errors(self): with tm.assertRaisesRegexp(TypeError, msg): obj - ng - # ToDo: currently, it accepts float because PeriodIndex.values - # is internally int. Should be fixed after GH13988 - # msg is different depending on NumPy version - if not _np_version_under1p9: - for ng in ["str"]: - with tm.assertRaises(TypeError): - np.add(obj, ng) + with tm.assertRaises(TypeError): + np.add(obj, ng) + if _np_version_under1p9: + self.assertIs(np.add(ng, obj), NotImplemented) + else: with tm.assertRaises(TypeError): np.add(ng, obj) + with tm.assertRaises(TypeError): + np.subtract(obj, ng) + + if _np_version_under1p9: + self.assertIs(np.subtract(ng, obj), NotImplemented) + else: with tm.assertRaises(TypeError): - np.subtract(ng, obj) + np.subtract(ng, obj) def test_pi_ops_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', @@ -4260,10 +4271,19 @@ def test_pi_sub_period(self): exp = pd.Index([-12, -11, -10, -9], name='idx') tm.assert_index_equal(result, exp) + result = np.subtract(idx, pd.Period('2012-01', freq='M')) + tm.assert_index_equal(result, exp) + result = pd.Period('2012-01', freq='M') - idx exp = pd.Index([12, 11, 10, 9], name='idx') tm.assert_index_equal(result, exp) + result = np.subtract(pd.Period('2012-01', freq='M'), idx) + if _np_version_under1p9: + self.assertIs(result, NotImplemented) + else: + tm.assert_index_equal(result, exp) + exp = pd.TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name='idx') tm.assert_index_equal(idx - pd.Period('NaT', freq='M'), exp) tm.assert_index_equal(pd.Period('NaT', freq='M') - idx, exp) @@ -4407,7 +4427,8 @@ def test_nanosecondly(self): def _check_freq(self, freq, base_date): rng = PeriodIndex(start=base_date, periods=10, freq=freq) exp = np.arange(10, dtype=np.int64) - self.assert_numpy_array_equal(rng.values, exp) + self.assert_numpy_array_equal(rng._values, exp) + self.assert_numpy_array_equal(rng.asi8, exp) def test_negone_ordinals(self): freqs = ['A', 'M', 'Q', 'D', 'H', 'T', 'S']