diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fe6c1ba808f9a..ab6db7780b283 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from typing import Any +from typing import TYPE_CHECKING, Any import weakref import numpy as np @@ -20,6 +20,7 @@ is_integer_dtype, is_list_like, is_object_dtype, + is_scalar, pandas_dtype, ) @@ -33,6 +34,7 @@ import pandas.core.common as com import pandas.core.indexes.base as ibase from pandas.core.indexes.base import ( + InvalidIndexError, _index_shared_docs, ensure_index, maybe_extract_name, @@ -52,6 +54,8 @@ _index_doc_kwargs = dict(ibase._index_doc_kwargs) _index_doc_kwargs.update(dict(target_klass="PeriodIndex or list of Periods")) +if TYPE_CHECKING: + from pandas import Series # --- Period index sketch @@ -474,43 +478,16 @@ def inferred_type(self) -> str: # indexing return "period" - def get_value(self, series, key): + def get_value(self, series: "Series", key): """ Fast lookup of value from 1-dimensional ndarray. Only use this if you know what you're doing """ if is_integer(key): - return series.iat[key] - - if isinstance(key, str): - try: - loc = self._get_string_slice(key) - return series[loc] - except (TypeError, ValueError, OverflowError): - pass - - asdt, reso = parse_time_string(key, self.freq) - grp = resolution.Resolution.get_freq_group(reso) - freqn = resolution.get_freq_group(self.freq) - - # _get_string_slice will handle cases where grp < freqn - assert grp >= freqn - - if grp == freqn: - key = Period(asdt, freq=self.freq) - loc = self.get_loc(key) - return series.iloc[loc] - else: - raise KeyError(key) - - elif isinstance(key, Period) or key is NaT: - ordinal = key.ordinal if key is not NaT else NaT.value - loc = self._engine.get_loc(ordinal) - return series[loc] - - # slice, PeriodIndex, np.ndarray, List[Period] - value = Index.get_value(self, series, key) - return com.maybe_box(self, value, series, key) + loc = key + else: + loc = self.get_loc(key) + return self._get_values_for_loc(series, loc) @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) def get_indexer(self, target, method=None, limit=None, tolerance=None): @@ -566,6 +543,9 @@ def get_loc(self, key, method=None, tolerance=None): If key is listlike or otherwise not hashable. """ + if not is_scalar(key): + raise InvalidIndexError(key) + if isinstance(key, str): try: diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 4c1438915ab33..38514594efe09 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -1,4 +1,5 @@ from datetime import datetime, timedelta +import re import numpy as np import pytest @@ -8,6 +9,7 @@ import pandas as pd from pandas import DatetimeIndex, Period, PeriodIndex, Series, notna, period_range import pandas._testing as tm +from pandas.core.indexes.base import InvalidIndexError class TestGetItem: @@ -408,11 +410,7 @@ def test_get_loc(self): with pytest.raises(KeyError, match=r"^1\.1$"): idx0.get_loc(1.1) - msg = ( - r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\], " - r"dtype='period\[D\]', freq='D'\)' is an invalid key" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(InvalidIndexError, match=re.escape(str(idx0))): idx0.get_loc(idx0) # get the location of p1/p2 from @@ -433,11 +431,7 @@ def test_get_loc(self): with pytest.raises(KeyError, match=r"^1\.1$"): idx1.get_loc(1.1) - msg = ( - r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\], " - r"dtype='period\[D\]', freq='D'\)' is an invalid key" - ) - with pytest.raises(TypeError, match=msg): + with pytest.raises(InvalidIndexError, match=re.escape(str(idx1))): idx1.get_loc(idx1) # get the location of p1/p2 from @@ -461,16 +455,46 @@ def test_get_loc_integer(self): with pytest.raises(KeyError, match="46"): pi2.get_loc(46) + @pytest.mark.parametrize("freq", ["H", "D"]) + def test_get_value_datetime_hourly(self, freq): + # get_loc and get_value should treat datetime objects symmetrically + dti = pd.date_range("2016-01-01", periods=3, freq="MS") + pi = dti.to_period(freq) + ser = pd.Series(range(7, 10), index=pi) + + ts = dti[0] + + assert pi.get_loc(ts) == 0 + assert pi.get_value(ser, ts) == 7 + assert ser[ts] == 7 + assert ser.loc[ts] == 7 + + ts2 = ts + pd.Timedelta(hours=3) + if freq == "H": + with pytest.raises(KeyError, match="2016-01-01 03:00"): + pi.get_loc(ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + pi.get_value(ser, ts2) + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser[ts2] + with pytest.raises(KeyError, match="2016-01-01 03:00"): + ser.loc[ts2] + else: + assert pi.get_loc(ts2) == 0 + assert pi.get_value(ser, ts2) == 7 + assert ser[ts2] == 7 + assert ser.loc[ts2] == 7 + def test_get_value_integer(self): dti = pd.date_range("2016-01-01", periods=3) pi = dti.to_period("D") ser = pd.Series(range(3), index=pi) - with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"): + with pytest.raises(IndexError, match="index out of bounds"): pi.get_value(ser, 16801) pi2 = dti.to_period("Y") # duplicates, ordinals are all 46 ser2 = pd.Series(range(3), index=pi2) - with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"): + with pytest.raises(IndexError, match="index out of bounds"): pi2.get_value(ser2, 46) def test_is_monotonic_increasing(self): @@ -544,25 +568,25 @@ def test_get_value(self): p2 = pd.Period("2017-09-03") idx0 = pd.PeriodIndex([p0, p1, p2]) - input0 = np.array([1, 2, 3]) + input0 = pd.Series(np.array([1, 2, 3]), index=idx0) expected0 = 2 result0 = idx0.get_value(input0, p1) assert result0 == expected0 idx1 = pd.PeriodIndex([p1, p1, p2]) - input1 = np.array([1, 2, 3]) - expected1 = np.array([1, 2]) + input1 = pd.Series(np.array([1, 2, 3]), index=idx1) + expected1 = input1.iloc[[0, 1]] result1 = idx1.get_value(input1, p1) - tm.assert_numpy_array_equal(result1, expected1) + tm.assert_series_equal(result1, expected1) idx2 = pd.PeriodIndex([p1, p2, p1]) - input2 = np.array([1, 2, 3]) - expected2 = np.array([1, 3]) + input2 = pd.Series(np.array([1, 2, 3]), index=idx2) + expected2 = input2.iloc[[0, 2]] result2 = idx2.get_value(input2, p1) - tm.assert_numpy_array_equal(result2, expected2) + tm.assert_series_equal(result2, expected2) def test_get_indexer(self): # GH 17717