From 4dc5dcba5c9804407a35c63f5a5e56283e7c5753 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 12 Jan 2020 08:42:30 -0800 Subject: [PATCH 1/5] REF: simplify PeriodIndex.get_loc, get_value --- pandas/_libs/index.pyx | 2 +- pandas/core/indexes/period.py | 59 +++++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 25 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 28d269a9a809e..a64ccb3c0c81c 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -500,7 +500,7 @@ cdef class TimedeltaEngine(DatetimeEngine): cdef class PeriodEngine(Int64Engine): cdef _get_index_values(self): - return super(PeriodEngine, self).vgetter() + return super(PeriodEngine, self).vgetter().view("i8") cdef void _call_map_locations(self, values): # super(...) pattern doesn't seem to work with `cdef` diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 6ab2e66e05d6e..2f9049b380256 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -4,7 +4,7 @@ import numpy as np from pandas._libs import index as libindex -from pandas._libs.tslibs import NaT, frequencies as libfrequencies, iNaT, resolution +from pandas._libs.tslibs import NaT, frequencies as libfrequencies, resolution from pandas._libs.tslibs.period import Period from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -17,6 +17,7 @@ is_float_dtype, is_integer, is_integer_dtype, + is_list_like, is_object_dtype, pandas_dtype, ) @@ -42,7 +43,6 @@ ) from pandas.core.indexes.datetimes import DatetimeIndex, Index from pandas.core.indexes.numeric import Int64Index -from pandas.core.missing import isna from pandas.core.ops import get_op_result_name from pandas.core.tools.datetimes import DateParseError, parse_time_string @@ -531,16 +531,15 @@ def get_value(self, series, key): key = slice(pos[0], pos[1] + 1) return series[key] elif grp == freqn: - key = Period(asdt, freq=self.freq).ordinal - return com.maybe_box( - self, self._int64index.get_value(s, key), series, key - ) + key = Period(asdt, freq=self.freq) + loc = self.get_loc(key) + return series[loc] else: raise KeyError(key) period = Period(key, self.freq) - key = period.value if isna(period) else period.ordinal - return com.maybe_box(self, self._int64index.get_value(s, key), series, key) + loc = self.get_loc(period) + return series[loc] else: return com.maybe_box(self, value, series, key) @@ -579,36 +578,48 @@ def get_indexer_non_unique(self, target): def get_loc(self, key, method=None, tolerance=None): """ - Get integer location for requested label + Get integer location for requested label. + + Parameters + ---------- + key : Period, NaT, str, or datetime + String or datetime key must be parseable as Period. Returns ------- - loc : int + loc : int or ndarray[int64] + + Raises + ------ + KeyError + Key is not present in the index. + TypeError + If key is listlike or otherwise not hashable. """ - try: - return self._engine.get_loc(key) - except KeyError: - if is_integer(key): - raise + if isinstance(key, str): try: asdt, parsed, reso = parse_time_string(key, self.freq) key = asdt - except TypeError: - pass except DateParseError: # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") - try: - key = Period(key, freq=self.freq) - except ValueError: - # we cannot construct the Period - # as we have an invalid type - raise KeyError(key) + try: + key = Period(key, freq=self.freq) + except ValueError: + # we cannot construct the Period + # as we have an invalid type + if is_list_like(key): + raise TypeError(f"'{key}' is an invalid key") + raise KeyError(key) + + ordinal = key.ordinal if key is not NaT else key.value + try: + return self._engine.get_loc(ordinal) + except KeyError: try: - ordinal = iNaT if key is NaT else key.ordinal if tolerance is not None: tolerance = self._convert_tolerance(tolerance, np.asarray(key)) return self._int64index.get_loc(ordinal, method, tolerance) From 45048f6abb115fbb85bfbaa7ff1f9c8bc44987b8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 12 Jan 2020 14:22:47 -0800 Subject: [PATCH 2/5] tests passing --- pandas/core/indexes/period.py | 7 +++++++ pandas/tests/indexes/period/test_indexing.py | 22 ++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 2f9049b380256..af45070529d34 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -507,6 +507,9 @@ def get_value(self, series, key): Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ + if is_integer(key): + return series.iat[key] + s = com.values_from_object(series) try: value = super().get_value(s, key) @@ -605,6 +608,10 @@ def get_loc(self, key, method=None, tolerance=None): # A string with invalid format raise KeyError(f"Cannot interpret '{key}' as period") + elif is_integer(key): + # Period constructor will cast to string, which we dont want + raise KeyError(key) + try: key = Period(key, freq=self.freq) except ValueError: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 7dbefbdaff98e..6bda9756afc86 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -451,6 +451,28 @@ def test_get_loc(self): tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2) tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) + def test_get_loc_integer(self): + dti = pd.date_range('2016-01-01', periods=3) + pi = dti.to_period("D") + with pytest.raises(KeyError, match="16801"): + pi.get_loc(16801) + + pi2 = dti.to_period("Y") # duplicates, ordinals are all 46 + with pytest.raises(KeyError, match="46"): + pi2.get_loc(46) + + def test_get_value_integer(self): + dti = pd.date_range('2016-01-01', periods=3) + pi = dti.to_period("D") + ser = pd.Series(range(3), index=pi) + with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"): + pi.get_value(ser, 16801) + + pi2 = dti.to_period("Y") # duplicates, ordinals are all 46 + ser2 = pd.Series(range(3), index=pi2) + with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"): + pi2.get_value(ser2, 46) + def test_is_monotonic_increasing(self): # GH 17717 p0 = pd.Period("2017-09-01") From 2ee7090978631631575c4c7192163ccb71060d99 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 12 Jan 2020 18:42:51 -0800 Subject: [PATCH 3/5] simplify refactor --- pandas/core/indexes/period.py | 62 +++++++++++++++++------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index af45070529d34..cc0c8d87eb445 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -510,42 +510,42 @@ def get_value(self, series, key): if is_integer(key): return series.iat[key] - s = com.values_from_object(series) - try: - value = super().get_value(s, key) - except (KeyError, IndexError): - if isinstance(key, str): - asdt, parsed, reso = parse_time_string(key, self.freq) - grp = resolution.Resolution.get_freq_group(reso) - freqn = resolution.get_freq_group(self.freq) - - vals = self._ndarray_values - - # if our data is higher resolution than requested key, slice - if grp < freqn: - iv = Period(asdt, freq=(grp, 1)) - ord1 = iv.asfreq(self.freq, how="S").ordinal - ord2 = iv.asfreq(self.freq, how="E").ordinal - - if ord2 < vals[0] or ord1 > vals[-1]: - raise KeyError(key) - - pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) - key = slice(pos[0], pos[1] + 1) - return series[key] - elif grp == freqn: - key = Period(asdt, freq=self.freq) - loc = self.get_loc(key) - return series[loc] - else: + if isinstance(key, str): + asdt, parsed, reso = parse_time_string(key, self.freq) + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) + + vals = self._ndarray_values + + # if our data is higher resolution than requested key, slice + if grp < freqn: + iv = Period(asdt, freq=(grp, 1)) + ord1 = iv.asfreq(self.freq, how="S").ordinal + ord2 = iv.asfreq(self.freq, how="E").ordinal + + if ord2 < vals[0] or ord1 > vals[-1]: raise KeyError(key) - period = Period(key, self.freq) - loc = self.get_loc(period) + pos = np.searchsorted(self._ndarray_values, [ord1, ord2]) + key = slice(pos[0], pos[1] + 1) + return series[key] + elif grp == freqn: + key = Period(asdt, freq=self.freq) + loc = self.get_loc(key) + return series[loc] + else: + raise KeyError(key) + + elif isinstance(key, Period) or key is NaT: + ordinal = key.ordinal if key is not NaT else NaT.value + loc = self._engine.get_loc(ordinal) return series[loc] - else: + value = Index.get_value(self, series, key) return com.maybe_box(self, value, series, key) + value = Index.get_value(self, series, key) + return com.maybe_box(self, value, series, key) + @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): target = ensure_index(target) From 79e21ec57f3476d2afd6008af8d9a8d266cad75c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Jan 2020 16:27:09 -0800 Subject: [PATCH 4/5] black fixup --- pandas/core/indexes/period.py | 2 -- pandas/tests/indexes/period/test_indexing.py | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8e75489b50e39..07d5590689b1f 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -540,8 +540,6 @@ def get_value(self, series, key): ordinal = key.ordinal if key is not NaT else NaT.value loc = self._engine.get_loc(ordinal) return series[loc] - value = Index.get_value(self, series, key) - return com.maybe_box(self, value, series, key) value = Index.get_value(self, series, key) return com.maybe_box(self, value, series, key) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 317d4b85b7989..592dccc5fc8ed 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -452,7 +452,7 @@ def test_get_loc(self): tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2) def test_get_loc_integer(self): - dti = pd.date_range('2016-01-01', periods=3) + dti = pd.date_range("2016-01-01", periods=3) pi = dti.to_period("D") with pytest.raises(KeyError, match="16801"): pi.get_loc(16801) @@ -462,7 +462,7 @@ def test_get_loc_integer(self): pi2.get_loc(46) def test_get_value_integer(self): - dti = pd.date_range('2016-01-01', periods=3) + dti = pd.date_range("2016-01-01", periods=3) pi = dti.to_period("D") ser = pd.Series(range(3), index=pi) with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"): From 113a1a55387cd4fdd4da01257b863770df382a27 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Jan 2020 17:55:55 -0800 Subject: [PATCH 5/5] comment, use iloc --- pandas/core/indexes/period.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 07d5590689b1f..123353b620bfa 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -532,7 +532,7 @@ def get_value(self, series, key): elif grp == freqn: key = Period(asdt, freq=self.freq) loc = self.get_loc(key) - return series[loc] + return series.iloc[loc] else: raise KeyError(key) @@ -541,6 +541,7 @@ def get_value(self, series, key): loc = self._engine.get_loc(ordinal) return series[loc] + # slice, PeriodIndex, np.ndarray, List[Period] value = Index.get_value(self, series, key) return com.maybe_box(self, value, series, key)