diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index c41da4d67afe5..5c64b0a55c09b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -100,7 +100,7 @@ Conversion Indexing ^^^^^^^^ -- +- Bug in :func:`PeriodIndex.truncate` which raises ``TypeError`` when ``PeriodIndex`` is monotonic (:issue:`17717`) - - diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index e98c0131e9c44..78eb7b3ae483e 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,7 +17,7 @@ from tslib cimport _to_i8 from hashtable cimport HashTable -from pandas._libs import algos, hashtable as _hash +from pandas._libs import algos, period as periodlib, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta @@ -270,13 +270,16 @@ cdef class IndexEngine: values = self._get_index_values() self.mapping = self._make_hash_table(len(values)) - self.mapping.map_locations(values) + self._call_map_locations(values) if len(self.mapping) == len(values): self.unique = 1 self.need_unique_check = 0 + cpdef _call_map_locations(self, values): + self.mapping.map_locations(values) + def clear_mapping(self): self.mapping = None self.need_monotonic_check = 1 @@ -490,6 +493,53 @@ cdef class TimedeltaEngine(DatetimeEngine): cdef _get_box_dtype(self): return 'm8[ns]' + +cdef class PeriodEngine(Int64Engine): + + cdef _get_index_values(self): + return super(PeriodEngine, self).vgetter() + + cpdef _call_map_locations(self, values): + super(PeriodEngine, self)._call_map_locations(values.view('i8')) + + def _call_monotonic(self, values): + return super(PeriodEngine, self)._call_monotonic(values.view('i8')) + + def get_indexer(self, values): + cdef ndarray[int64_t, ndim=1] ordinals + + super(PeriodEngine, self)._ensure_mapping_populated() + + freq = super(PeriodEngine, self).vgetter().freq + ordinals = periodlib.extract_ordinals(values, freq) + + return self.mapping.lookup(ordinals) + + def get_pad_indexer(self, other, limit=None): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) + + return algos.pad_int64(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_backfill_indexer(self, other, limit=None): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(other, freq) + + return algos.backfill_int64(self._get_index_values(), + np.asarray(ordinal), limit=limit) + + def get_indexer_non_unique(self, targets): + freq = super(PeriodEngine, self).vgetter().freq + ordinal = periodlib.extract_ordinals(targets, freq) + ordinal_array = np.asarray(ordinal) + + return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array) + + cdef _get_index_values_for_bool_indexer(self): + return self._get_index_values().view('i8') + + cpdef convert_scalar(ndarray arr, object value): # we don't turn integers # into datetimes/timedeltas diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in index 76c0deef7ebee..b9fc0ddd7ea1c 100644 --- a/pandas/_libs/index_class_helper.pxi.in +++ b/pandas/_libs/index_class_helper.pxi.in @@ -66,7 +66,7 @@ cdef class {{name}}Engine(IndexEngine): raise KeyError(val) {{endif}} - values = self._get_index_values() + values = self._get_index_values_for_bool_indexer() n = len(values) result = np.empty(n, dtype=bool) @@ -86,6 +86,9 @@ cdef class {{name}}Engine(IndexEngine): return last_true return result + + cdef _get_index_values_for_bool_indexer(self): + return self._get_index_values() {{endif}} {{endfor}} diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 148ca2725fbdc..c4938b556c8dd 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -31,7 +31,7 @@ import pandas.tseries.offsets as offsets from pandas._libs.lib import infer_dtype -from pandas._libs import tslib, period +from pandas._libs import tslib, period, index as libindex from pandas._libs.period import (Period, IncompatibleFrequency, get_period_field_arr, _validate_end_alias, _quarter_to_myear) @@ -192,6 +192,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): freq = None + _engine_type = libindex.PeriodEngine + __eq__ = _period_index_cmp('__eq__') __ne__ = _period_index_cmp('__ne__', nat_result=True) __lt__ = _period_index_cmp('__lt__') @@ -275,6 +277,10 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, data = period.extract_ordinals(data, freq) return cls._from_ordinals(data, name=name, freq=freq) + @cache_readonly + def _engine(self): + return self._engine_type(lambda: self, len(self)) + @classmethod def _generate_range(cls, start, end, periods, freq, fields): if freq is not None: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index efc13a56cd77e..d99eba3e2d5e9 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -6,7 +6,7 @@ import pandas as pd from pandas.util import testing as tm from pandas.compat import lrange -from pandas._libs import tslib +from pandas._libs import tslib, tslibs from pandas import (PeriodIndex, Series, DatetimeIndex, period_range, Period) @@ -310,3 +310,197 @@ def test_take_fill_value(self): with pytest.raises(IndexError): idx.take(np.array([1, -5])) + + def test_get_loc(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with non-duplicate + idx0 = pd.PeriodIndex([p0, p1, p2]) + expected_idx1_p1 = 1 + expected_idx1_p2 = 2 + + assert idx0.get_loc(p1) == expected_idx1_p1 + assert idx0.get_loc(str(p1)) == expected_idx1_p1 + assert idx0.get_loc(p2) == expected_idx1_p2 + assert idx0.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx0.get_loc, 'foo') + pytest.raises(KeyError, idx0.get_loc, 1.1) + pytest.raises(TypeError, idx0.get_loc, idx0) + + # get the location of p1/p2 from + # monotonic increasing PeriodIndex with duplicate + idx1 = pd.PeriodIndex([p1, p1, p2]) + expected_idx1_p1 = slice(0, 2) + expected_idx1_p2 = 2 + + assert idx1.get_loc(p1) == expected_idx1_p1 + assert idx1.get_loc(str(p1)) == expected_idx1_p1 + assert idx1.get_loc(p2) == expected_idx1_p2 + assert idx1.get_loc(str(p2)) == expected_idx1_p2 + + pytest.raises(tslibs.parsing.DateParseError, idx1.get_loc, 'foo') + pytest.raises(KeyError, idx1.get_loc, 1.1) + pytest.raises(TypeError, idx1.get_loc, idx1) + + # get the location of p1/p2 from + # non-monotonic increasing/decreasing PeriodIndex with duplicate + idx2 = pd.PeriodIndex([p2, p1, p2]) + expected_idx2_p1 = 1 + expected_idx2_p2 = np.array([True, False, True]) + + assert idx2.get_loc(p1) == expected_idx2_p1 + assert idx2.get_loc(str(p1)) == expected_idx2_p1 + tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2) + tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + + def test_is_monotonic_increasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert idx_inc0.is_monotonic_increasing + assert idx_inc1.is_monotonic_increasing + assert not idx_dec0.is_monotonic_increasing + assert not idx_dec1.is_monotonic_increasing + assert not idx.is_monotonic_increasing + + def test_is_monotonic_decreasing(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx_inc0 = pd.PeriodIndex([p0, p1, p2]) + idx_inc1 = pd.PeriodIndex([p0, p1, p1]) + idx_dec0 = pd.PeriodIndex([p2, p1, p0]) + idx_dec1 = pd.PeriodIndex([p2, p1, p1]) + idx = pd.PeriodIndex([p1, p2, p0]) + + assert not idx_inc0.is_monotonic_decreasing + assert not idx_inc1.is_monotonic_decreasing + assert idx_dec0.is_monotonic_decreasing + assert idx_dec1.is_monotonic_decreasing + assert not idx.is_monotonic_decreasing + + def test_is_unique(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + assert idx0.is_unique + + idx1 = pd.PeriodIndex([p1, p1, p2]) + assert not idx1.is_unique + + def test_contains(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + + ps0 = [p0, p1, p2] + idx0 = pd.PeriodIndex(ps0) + + for p in ps0: + assert idx0.contains(p) + assert p in idx0 + + assert idx0.contains(str(p)) + assert str(p) in idx0 + + assert idx0.contains('2017-09-01 00:00:01') + assert '2017-09-01 00:00:01' in idx0 + + assert idx0.contains('2017-09') + assert '2017-09' in idx0 + + assert not idx0.contains(p3) + assert p3 not in idx0 + + def test_get_value(self): + # GH 17717 + p0 = pd.Period('2017-09-01') + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + + idx0 = pd.PeriodIndex([p0, p1, p2]) + input0 = np.array([1, 2, 3]) + expected0 = 2 + + result0 = idx0.get_value(input0, p1) + assert result0 == expected0 + + idx1 = pd.PeriodIndex([p1, p1, p2]) + input1 = np.array([1, 2, 3]) + expected1 = np.array([1, 2]) + + result1 = idx1.get_value(input1, p1) + tm.assert_numpy_array_equal(result1, expected1) + + idx2 = pd.PeriodIndex([p1, p2, p1]) + input2 = np.array([1, 2, 3]) + expected2 = np.array([1, 3]) + + result2 = idx2.get_value(input2, p1) + tm.assert_numpy_array_equal(result2, expected2) + + def test_get_indexer(self): + # GH 17717 + p1 = pd.Period('2017-09-01') + p2 = pd.Period('2017-09-04') + p3 = pd.Period('2017-09-07') + + tp0 = pd.Period('2017-08-31') + tp1 = pd.Period('2017-09-02') + tp2 = pd.Period('2017-09-05') + tp3 = pd.Period('2017-09-09') + + idx = pd.PeriodIndex([p1, p2, p3]) + + tm.assert_numpy_array_equal(idx.get_indexer(idx), + np.array([0, 1, 2], dtype=np.intp)) + + target = pd.PeriodIndex([tp0, tp1, tp2, tp3]) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'pad'), + np.array([-1, 0, 1, 2], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'backfill'), + np.array([0, 1, 2, -1], dtype=np.intp)) + tm.assert_numpy_array_equal(idx.get_indexer(target, 'nearest'), + np.array([0, 0, 1, 2], dtype=np.intp)) + + res = idx.get_indexer(target, 'nearest', + tolerance=pd.Timedelta('1 day')) + tm.assert_numpy_array_equal(res, + np.array([0, 0, 1, -1], dtype=np.intp)) + + def test_get_indexer_non_unique(self): + # GH 17717 + p1 = pd.Period('2017-09-02') + p2 = pd.Period('2017-09-03') + p3 = pd.Period('2017-09-04') + p4 = pd.Period('2017-09-05') + + idx1 = pd.PeriodIndex([p1, p2, p1]) + idx2 = pd.PeriodIndex([p2, p1, p3, p4]) + + result = idx1.get_indexer_non_unique(idx2) + expected_indexer = np.array([1, 0, 2, -1, -1], dtype=np.int64) + expected_missing = np.array([2, 3], dtype=np.int64) + + tm.assert_numpy_array_equal(result[0], expected_indexer) + tm.assert_numpy_array_equal(result[1], expected_missing) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index e907b0edd5c6a..b4ff25d2630b8 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -249,3 +249,33 @@ def test_align_series(self): msg = "Input has different freq=D from PeriodIndex\\(freq=A-DEC\\)" with tm.assert_raises_regex(period.IncompatibleFrequency, msg): ts + ts.asfreq('D', how="end") + + def test_truncate(self): + # GH 17717 + idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series1 = pd.Series([1, 2, 3], index=idx1) + result1 = series1.truncate(after='2017-09-02') + + expected_idx1 = pd.PeriodIndex([ + pd.Period('2017-09-02'), + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result1, pd.Series([1, 2], index=expected_idx1)) + + idx2 = pd.PeriodIndex([ + pd.Period('2017-09-03'), + pd.Period('2017-09-02'), + pd.Period('2017-09-03') + ]) + series2 = pd.Series([1, 2, 3], index=idx2) + result2 = series2.truncate(after='2017-09-02') + + expected_idx2 = pd.PeriodIndex([ + pd.Period('2017-09-03'), + pd.Period('2017-09-02') + ]) + tm.assert_series_equal(result2, pd.Series([1, 2], index=expected_idx2))