Skip to content

BUG: PeriodIndex.searchsorted accepting invalid inputs #30763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jan 8, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,7 @@ Indexing
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`)

Missing
^^^^^^^
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6976,8 +6976,7 @@ def asof(self, where, subset=None):
if not is_list:
start = self.index[0]
if isinstance(self.index, PeriodIndex):
where = Period(where, freq=self.index.freq).ordinal
start = start.ordinal
where = Period(where, freq=self.index.freq)

if where < start:
if not is_series:
Expand Down
17 changes: 9 additions & 8 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,17 +479,19 @@ def astype(self, dtype, copy=True, how="start"):
@Substitution(klass="PeriodIndex")
@Appender(_shared_docs["searchsorted"])
def searchsorted(self, value, side="left", sorter=None):
if isinstance(value, Period):
if value.freq != self.freq:
raise raise_on_incompatible(self, value)
value = value.ordinal
if isinstance(value, Period) or value is NaT:
self._data._check_compatible_with(value)
elif isinstance(value, str):
try:
value = Period(value, freq=self.freq).ordinal
value = Period(value, freq=self.freq)
except DateParseError:
raise KeyError(f"Cannot interpret '{value}' as period")
elif not isinstance(value, PeriodArray):
raise TypeError(
"PeriodIndex.searchsorted requires either a Period or PeriodArray"
)

return self._ndarray_values.searchsorted(value, side=side, sorter=sorter)
return self._data.searchsorted(value, side=side, sorter=sorter)

@property
def is_full(self) -> bool:
Expand Down Expand Up @@ -722,8 +724,7 @@ def _get_string_slice(self, key):

t1, t2 = self._parsed_string_to_bounds(reso, parsed)
return slice(
self.searchsorted(t1.ordinal, side="left"),
self.searchsorted(t2.ordinal, side="right"),
self.searchsorted(t1, side="left"), self.searchsorted(t2, side="right")
)

def _convert_tolerance(self, tolerance, target):
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,10 @@ def _get_period_bins(self, ax):
rng += freq_mult
# adjust bin edge indexes to account for base
rng -= bin_shift
bins = memb.searchsorted(rng, side="left")

# Wrap in PeriodArray for PeriodArray.searchsorted
prng = type(memb._data)(rng, dtype=memb.dtype)
bins = memb.searchsorted(prng, side="left")

if nat_count > 0:
# NaT handling as in pandas._lib.lib.generate_bins_dt64()
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/frame/methods/test_asof.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas import DataFrame, Series, Timestamp, date_range, to_datetime
from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime
import pandas._testing as tm


Expand Down Expand Up @@ -80,6 +80,12 @@ def test_missing(self, date_range_frame):
)
tm.assert_frame_equal(result, expected)

# Check that we handle PeriodIndex correctly, dont end up with
# period.ordinal for series name
df = df.to_period("D")
result = df.asof("1989-12-31")
assert isinstance(result.name, Period)

def test_all_nans(self, date_range_frame):
# GH 15713
# DataFrame is all nans
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/period/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,7 @@ def test_index_duplicate_periods(self):
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
ts = Series(np.random.randn(len(idx)), index=idx)

result = ts[2007]
result = ts["2007"]
expected = ts[1:3]
tm.assert_series_equal(result, expected)
result[:] = 1
Expand All @@ -461,7 +461,7 @@ def test_index_duplicate_periods(self):
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
ts = Series(np.random.randn(len(idx)), index=idx)

result = ts[2007]
result = ts["2007"]
expected = ts[idx == "2007"]
tm.assert_series_equal(result, expected)

Expand Down
33 changes: 31 additions & 2 deletions pandas/tests/indexes/period/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,14 +231,43 @@ def test_searchsorted(self, freq):
p2 = pd.Period("2014-01-04", freq=freq)
assert pidx.searchsorted(p2) == 3

msg = "Input has different freq=H from PeriodIndex"
assert pidx.searchsorted(pd.NaT) == 0

msg = "Input has different freq=H from PeriodArray"
with pytest.raises(IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period("2014-01-01", freq="H"))

msg = "Input has different freq=5D from PeriodIndex"
msg = "Input has different freq=5D from PeriodArray"
with pytest.raises(IncompatibleFrequency, match=msg):
pidx.searchsorted(pd.Period("2014-01-01", freq="5D"))

def test_searchsorted_invalid(self):
pidx = pd.PeriodIndex(
["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"],
freq="D",
)

other = np.array([0, 1], dtype=np.int64)

msg = "requires either a Period or PeriodArray"
with pytest.raises(TypeError, match=msg):
pidx.searchsorted(other)

with pytest.raises(TypeError, match=msg):
pidx.searchsorted(other.astype("timedelta64[ns]"))

with pytest.raises(TypeError, match=msg):
pidx.searchsorted(np.timedelta64(4))

with pytest.raises(TypeError, match=msg):
pidx.searchsorted(np.timedelta64("NaT", "ms"))

with pytest.raises(TypeError, match=msg):
pidx.searchsorted(np.datetime64(4, "ns"))

with pytest.raises(TypeError, match=msg):
pidx.searchsorted(np.datetime64("NaT", "ns"))


class TestPeriodIndexConversion:
def test_tolist(self):
Expand Down