Skip to content

Commit 511a284

Browse files
jbrockmendeljreback
authored andcommitted
BUG: PeriodIndex.searchsorted accepting invalid inputs (#30763)
1 parent f2b213c commit 511a284

File tree

7 files changed

+55
-16
lines changed

7 files changed

+55
-16
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -937,6 +937,7 @@ Indexing
937937
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
938938
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
939939
- Bug in :meth:`Series.__setitem__` incorrectly assigning values with boolean indexer when the length of new data matches the number of ``True`` values and new data is not a ``Series`` or an ``np.array`` (:issue:`30567`)
940+
- Bug in indexing with a :class:`PeriodIndex` incorrectly accepting integers representing years, use e.g. ``ser.loc["2007"]`` instead of ``ser.loc[2007]`` (:issue:`30763`)
940941

941942
Missing
942943
^^^^^^^

pandas/core/generic.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -6972,8 +6972,7 @@ def asof(self, where, subset=None):
69726972
if not is_list:
69736973
start = self.index[0]
69746974
if isinstance(self.index, PeriodIndex):
6975-
where = Period(where, freq=self.index.freq).ordinal
6976-
start = start.ordinal
6975+
where = Period(where, freq=self.index.freq)
69776976

69786977
if where < start:
69796978
if not is_series:

pandas/core/indexes/period.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -469,17 +469,19 @@ def astype(self, dtype, copy=True, how="start"):
469469
@Substitution(klass="PeriodIndex")
470470
@Appender(_shared_docs["searchsorted"])
471471
def searchsorted(self, value, side="left", sorter=None):
472-
if isinstance(value, Period):
473-
if value.freq != self.freq:
474-
raise raise_on_incompatible(self, value)
475-
value = value.ordinal
472+
if isinstance(value, Period) or value is NaT:
473+
self._data._check_compatible_with(value)
476474
elif isinstance(value, str):
477475
try:
478-
value = Period(value, freq=self.freq).ordinal
476+
value = Period(value, freq=self.freq)
479477
except DateParseError:
480478
raise KeyError(f"Cannot interpret '{value}' as period")
479+
elif not isinstance(value, PeriodArray):
480+
raise TypeError(
481+
"PeriodIndex.searchsorted requires either a Period or PeriodArray"
482+
)
481483

482-
return self._ndarray_values.searchsorted(value, side=side, sorter=sorter)
484+
return self._data.searchsorted(value, side=side, sorter=sorter)
483485

484486
@property
485487
def is_full(self) -> bool:
@@ -703,8 +705,7 @@ def _get_string_slice(self, key):
703705

704706
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
705707
return slice(
706-
self.searchsorted(t1.ordinal, side="left"),
707-
self.searchsorted(t2.ordinal, side="right"),
708+
self.searchsorted(t1, side="left"), self.searchsorted(t2, side="right")
708709
)
709710

710711
def _convert_tolerance(self, tolerance, target):

pandas/core/resample.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1586,7 +1586,10 @@ def _get_period_bins(self, ax):
15861586
rng += freq_mult
15871587
# adjust bin edge indexes to account for base
15881588
rng -= bin_shift
1589-
bins = memb.searchsorted(rng, side="left")
1589+
1590+
# Wrap in PeriodArray for PeriodArray.searchsorted
1591+
prng = type(memb._data)(rng, dtype=memb.dtype)
1592+
bins = memb.searchsorted(prng, side="left")
15901593

15911594
if nat_count > 0:
15921595
# NaT handling as in pandas._lib.lib.generate_bins_dt64()

pandas/tests/frame/methods/test_asof.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas import DataFrame, Series, Timestamp, date_range, to_datetime
4+
from pandas import DataFrame, Period, Series, Timestamp, date_range, to_datetime
55
import pandas._testing as tm
66

77

@@ -80,6 +80,12 @@ def test_missing(self, date_range_frame):
8080
)
8181
tm.assert_frame_equal(result, expected)
8282

83+
# Check that we handle PeriodIndex correctly, dont end up with
84+
# period.ordinal for series name
85+
df = df.to_period("D")
86+
result = df.asof("1989-12-31")
87+
assert isinstance(result.name, Period)
88+
8389
def test_all_nans(self, date_range_frame):
8490
# GH 15713
8591
# DataFrame is all nans

pandas/tests/indexes/period/test_period.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ def test_index_duplicate_periods(self):
451451
idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
452452
ts = Series(np.random.randn(len(idx)), index=idx)
453453

454-
result = ts[2007]
454+
result = ts["2007"]
455455
expected = ts[1:3]
456456
tm.assert_series_equal(result, expected)
457457
result[:] = 1
@@ -461,7 +461,7 @@ def test_index_duplicate_periods(self):
461461
idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
462462
ts = Series(np.random.randn(len(idx)), index=idx)
463463

464-
result = ts[2007]
464+
result = ts["2007"]
465465
expected = ts[idx == "2007"]
466466
tm.assert_series_equal(result, expected)
467467

pandas/tests/indexes/period/test_tools.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -231,14 +231,43 @@ def test_searchsorted(self, freq):
231231
p2 = pd.Period("2014-01-04", freq=freq)
232232
assert pidx.searchsorted(p2) == 3
233233

234-
msg = "Input has different freq=H from PeriodIndex"
234+
assert pidx.searchsorted(pd.NaT) == 0
235+
236+
msg = "Input has different freq=H from PeriodArray"
235237
with pytest.raises(IncompatibleFrequency, match=msg):
236238
pidx.searchsorted(pd.Period("2014-01-01", freq="H"))
237239

238-
msg = "Input has different freq=5D from PeriodIndex"
240+
msg = "Input has different freq=5D from PeriodArray"
239241
with pytest.raises(IncompatibleFrequency, match=msg):
240242
pidx.searchsorted(pd.Period("2014-01-01", freq="5D"))
241243

244+
def test_searchsorted_invalid(self):
245+
pidx = pd.PeriodIndex(
246+
["2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05"],
247+
freq="D",
248+
)
249+
250+
other = np.array([0, 1], dtype=np.int64)
251+
252+
msg = "requires either a Period or PeriodArray"
253+
with pytest.raises(TypeError, match=msg):
254+
pidx.searchsorted(other)
255+
256+
with pytest.raises(TypeError, match=msg):
257+
pidx.searchsorted(other.astype("timedelta64[ns]"))
258+
259+
with pytest.raises(TypeError, match=msg):
260+
pidx.searchsorted(np.timedelta64(4))
261+
262+
with pytest.raises(TypeError, match=msg):
263+
pidx.searchsorted(np.timedelta64("NaT", "ms"))
264+
265+
with pytest.raises(TypeError, match=msg):
266+
pidx.searchsorted(np.datetime64(4, "ns"))
267+
268+
with pytest.raises(TypeError, match=msg):
269+
pidx.searchsorted(np.datetime64("NaT", "ns"))
270+
242271

243272
class TestPeriodIndexConversion:
244273
def test_tolist(self):

0 commit comments

Comments
 (0)