Skip to content

REF: PeriodIndex.get_loc #31021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jan 15, 2020
2 changes: 1 addition & 1 deletion pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
cdef class PeriodEngine(Int64Engine):

cdef _get_index_values(self):
return super(PeriodEngine, self).vgetter()
return super(PeriodEngine, self).vgetter().view("i8")

cdef void _call_map_locations(self, values):
# super(...) pattern doesn't seem to work with `cdef`
Expand Down
121 changes: 69 additions & 52 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np

from pandas._libs import index as libindex
from pandas._libs.tslibs import NaT, frequencies as libfrequencies, iNaT, resolution
from pandas._libs.tslibs import NaT, frequencies as libfrequencies, resolution
from pandas._libs.tslibs.period import Period
from pandas.util._decorators import Appender, Substitution, cache_readonly

Expand All @@ -17,6 +17,7 @@
is_float_dtype,
is_integer,
is_integer_dtype,
is_list_like,
is_object_dtype,
pandas_dtype,
)
Expand All @@ -42,7 +43,6 @@
)
from pandas.core.indexes.datetimes import DatetimeIndex, Index
from pandas.core.indexes.numeric import Int64Index
from pandas.core.missing import isna
from pandas.core.ops import get_op_result_name
from pandas.core.tools.datetimes import DateParseError, parse_time_string

Expand Down Expand Up @@ -507,42 +507,43 @@ def get_value(self, series, key):
Fast lookup of value from 1-dimensional ndarray. Only use this if you
know what you're doing
"""
s = com.values_from_object(series)
try:
value = super().get_value(s, key)
except (KeyError, IndexError):
if isinstance(key, str):
asdt, parsed, reso = parse_time_string(key, self.freq)
grp = resolution.Resolution.get_freq_group(reso)
freqn = resolution.get_freq_group(self.freq)

vals = self._ndarray_values

# if our data is higher resolution than requested key, slice
if grp < freqn:
iv = Period(asdt, freq=(grp, 1))
ord1 = iv.asfreq(self.freq, how="S").ordinal
ord2 = iv.asfreq(self.freq, how="E").ordinal

if ord2 < vals[0] or ord1 > vals[-1]:
raise KeyError(key)

pos = np.searchsorted(self._ndarray_values, [ord1, ord2])
key = slice(pos[0], pos[1] + 1)
return series[key]
elif grp == freqn:
key = Period(asdt, freq=self.freq).ordinal
return com.maybe_box(
self, self._int64index.get_value(s, key), series, key
)
else:
if is_integer(key):
return series.iat[key]

if isinstance(key, str):
asdt, parsed, reso = parse_time_string(key, self.freq)
grp = resolution.Resolution.get_freq_group(reso)
freqn = resolution.get_freq_group(self.freq)

vals = self._ndarray_values

# if our data is higher resolution than requested key, slice
if grp < freqn:
iv = Period(asdt, freq=(grp, 1))
ord1 = iv.asfreq(self.freq, how="S").ordinal
ord2 = iv.asfreq(self.freq, how="E").ordinal

if ord2 < vals[0] or ord1 > vals[-1]:
raise KeyError(key)

period = Period(key, self.freq)
key = period.value if isna(period) else period.ordinal
return com.maybe_box(self, self._int64index.get_value(s, key), series, key)
else:
return com.maybe_box(self, value, series, key)
pos = np.searchsorted(self._ndarray_values, [ord1, ord2])
key = slice(pos[0], pos[1] + 1)
return series[key]
elif grp == freqn:
key = Period(asdt, freq=self.freq)
loc = self.get_loc(key)
return series.iloc[loc]
else:
raise KeyError(key)

elif isinstance(key, Period) or key is NaT:
ordinal = key.ordinal if key is not NaT else NaT.value
loc = self._engine.get_loc(ordinal)
return series[loc]

# slice, PeriodIndex, np.ndarray, List[Period]
value = Index.get_value(self, series, key)
return com.maybe_box(self, value, series, key)

@Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
def get_indexer(self, target, method=None, limit=None, tolerance=None):
Expand Down Expand Up @@ -579,36 +580,52 @@ def get_indexer_non_unique(self, target):

def get_loc(self, key, method=None, tolerance=None):
"""
Get integer location for requested label
Get integer location for requested label.

Parameters
----------
key : Period, NaT, str, or datetime
String or datetime key must be parseable as Period.

Returns
-------
loc : int
loc : int or ndarray[int64]

Raises
------
KeyError
Key is not present in the index.
TypeError
If key is listlike or otherwise not hashable.
"""
try:
return self._engine.get_loc(key)
except KeyError:
if is_integer(key):
raise

if isinstance(key, str):
try:
asdt, parsed, reso = parse_time_string(key, self.freq)
key = asdt
except TypeError:
pass
except DateParseError:
# A string with invalid format
raise KeyError(f"Cannot interpret '{key}' as period")

try:
key = Period(key, freq=self.freq)
except ValueError:
# we cannot construct the Period
# as we have an invalid type
raise KeyError(key)
elif is_integer(key):
# Period constructor will cast to string, which we dont want
raise KeyError(key)

try:
key = Period(key, freq=self.freq)
except ValueError:
# we cannot construct the Period
# as we have an invalid type
if is_list_like(key):
raise TypeError(f"'{key}' is an invalid key")
raise KeyError(key)

ordinal = key.ordinal if key is not NaT else key.value
try:
return self._engine.get_loc(ordinal)
except KeyError:

try:
ordinal = iNaT if key is NaT else key.ordinal
if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
return self._int64index.get_loc(ordinal, method, tolerance)
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,28 @@ def test_get_loc(self):
tm.assert_numpy_array_equal(idx2.get_loc(p2), expected_idx2_p2)
tm.assert_numpy_array_equal(idx2.get_loc(str(p2)), expected_idx2_p2)

def test_get_loc_integer(self):
dti = pd.date_range("2016-01-01", periods=3)
pi = dti.to_period("D")
with pytest.raises(KeyError, match="16801"):
pi.get_loc(16801)

pi2 = dti.to_period("Y") # duplicates, ordinals are all 46
with pytest.raises(KeyError, match="46"):
pi2.get_loc(46)

def test_get_value_integer(self):
dti = pd.date_range("2016-01-01", periods=3)
pi = dti.to_period("D")
ser = pd.Series(range(3), index=pi)
with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"):
pi.get_value(ser, 16801)

pi2 = dti.to_period("Y") # duplicates, ordinals are all 46
ser2 = pd.Series(range(3), index=pi2)
with pytest.raises(IndexError, match="is out of bounds for axis 0 with size 3"):
pi2.get_value(ser2, 46)

def test_is_monotonic_increasing(self):
# GH 17717
p0 = pd.Period("2017-09-01")
Expand Down