Skip to content

Commit 4050e4c

Browse files
jbrockmendelWillAyd
authored andcommitted
ENH: partial string indexing on non-monotonic PeriodIndex (#31096)
1 parent 7ffcf9d commit 4050e4c

File tree

4 files changed

+108
-17
lines changed

4 files changed

+108
-17
lines changed

doc/source/user_guide/timeseries.rst

+5
Original file line numberDiff line numberDiff line change
@@ -1951,6 +1951,10 @@ The ``period`` dtype can be used in ``.astype(...)``. It allows one to change th
19511951
PeriodIndex partial string indexing
19521952
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19531953

1954+
PeriodIndex now supports partial string slicing with non-monotonic indexes.
1955+
1956+
.. versionadded:: 1.1.0
1957+
19541958
You can pass in dates and strings to ``Series`` and ``DataFrame`` with ``PeriodIndex``, in the same manner as ``DatetimeIndex``. For details, refer to :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>`.
19551959

19561960
.. ipython:: python
@@ -1981,6 +1985,7 @@ As with ``DatetimeIndex``, the endpoints will be included in the result. The exa
19811985
19821986
dfp['2013-01-01 10H':'2013-01-01 11H']
19831987
1988+
19841989
Frequency conversion and resampling with PeriodIndex
19851990
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
19861991
The frequency of ``Period`` and ``PeriodIndex`` can be converted via the ``asfreq``

doc/source/whatsnew/v1.1.0.rst

+21
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,27 @@ including other versions of pandas.
1313
Enhancements
1414
~~~~~~~~~~~~
1515

16+
.. _whatsnew_110.period_index_partial_string_slicing:
17+
18+
Nonmonotonic PeriodIndex Partial String Slicing
19+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
20+
:class:`PeriodIndex` now supports partial string slicing for non-monotonic indexes, mirroring :class:`DatetimeIndex` behavior (:issue:`31096`)
21+
22+
For example:
23+
24+
.. ipython:: python
25+
26+
dti = pd.date_range("2014-01-01", periods=30, freq="30D")
27+
pi = dti.to_period("D")
28+
ser_monotonic = pd.Series(np.arange(30), index=pi)
29+
shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2))
30+
ser = ser_monotonic[shuffler]
31+
ser
32+
33+
.. ipython:: python
34+
ser["2014"]
35+
ser.loc["May 2015"]
36+
1637
.. _whatsnew_110.enhancements.other:
1738

1839
Other enhancements

pandas/core/indexes/period.py

+32-14
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,11 @@ def get_loc(self, key, method=None, tolerance=None):
567567
"""
568568

569569
if isinstance(key, str):
570+
try:
571+
return self._get_string_slice(key)
572+
except (TypeError, KeyError, ValueError, OverflowError):
573+
pass
574+
570575
try:
571576
asdt, reso = parse_time_string(key, self.freq)
572577
key = asdt
@@ -648,10 +653,6 @@ def _parsed_string_to_bounds(self, reso: str, parsed: datetime):
648653

649654
def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True):
650655
# TODO: Check for non-True use_lhs/use_rhs
651-
raw = key
652-
if not self.is_monotonic:
653-
raise ValueError("Partial indexing only valid for ordered time series")
654-
655656
parsed, reso = parse_time_string(key, self.freq)
656657
grp = resolution.Resolution.get_freq_group(reso)
657658
freqn = resolution.get_freq_group(self.freq)
@@ -660,18 +661,35 @@ def _get_string_slice(self, key: str, use_lhs: bool = True, use_rhs: bool = True
660661
# TODO: we used to also check for
661662
# reso in ["day", "hour", "minute", "second"]
662663
# why is that check not needed?
663-
raise TypeError(key)
664+
raise ValueError(key)
664665

665666
t1, t2 = self._parsed_string_to_bounds(reso, parsed)
666-
if len(self):
667-
if t2 < self.min() or t1 > self.max():
668-
raise KeyError(raw)
669-
670-
# Use asi8 searchsorted to avoid overhead of re-validating inputs
671-
return slice(
672-
self.asi8.searchsorted(t1.ordinal, side="left"),
673-
self.asi8.searchsorted(t2.ordinal, side="right"),
674-
)
667+
i8vals = self.asi8
668+
669+
if self.is_monotonic:
670+
671+
# we are out of range
672+
if len(self) and (
673+
(use_lhs and t1 < self[0] and t2 < self[0])
674+
or ((use_rhs and t1 > self[-1] and t2 > self[-1]))
675+
):
676+
raise KeyError(key)
677+
678+
# TODO: does this depend on being monotonic _increasing_?
679+
# If so, DTI will also be affected.
680+
681+
# a monotonic (sorted) series can be sliced
682+
# Use asi8.searchsorted to avoid re-validating Periods
683+
left = i8vals.searchsorted(t1.ordinal, side="left") if use_lhs else None
684+
right = i8vals.searchsorted(t2.ordinal, side="right") if use_rhs else None
685+
return slice(left, right)
686+
687+
else:
688+
lhs_mask = (i8vals >= t1.ordinal) if use_lhs else True
689+
rhs_mask = (i8vals <= t2.ordinal) if use_rhs else True
690+
691+
# try to find a the dates
692+
return (lhs_mask & rhs_mask).nonzero()[0]
675693

676694
def _convert_tolerance(self, tolerance, target):
677695
tolerance = DatetimeIndexOpsMixin._convert_tolerance(self, tolerance, target)

pandas/tests/indexes/period/test_partial_slicing.py

+50-3
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,6 @@
77

88

99
class TestPeriodIndex:
10-
def setup_method(self, method):
11-
pass
12-
1310
def test_slice_with_negative_step(self):
1411
ts = Series(np.arange(20), period_range("2014-01", periods=20, freq="M"))
1512
SLC = pd.IndexSlice
@@ -133,3 +130,53 @@ def test_range_slice_outofbounds(self):
133130
tm.assert_frame_equal(df["2013/10/15":"2013/10/17"], empty)
134131
tm.assert_frame_equal(df["2013-06":"2013-09"], empty)
135132
tm.assert_frame_equal(df["2013-11":"2013-12"], empty)
133+
134+
def test_partial_slice_doesnt_require_monotonicity(self):
135+
# See also: DatetimeIndex test ofm the same name
136+
dti = pd.date_range("2014-01-01", periods=30, freq="30D")
137+
pi = dti.to_period("D")
138+
139+
ser_montonic = pd.Series(np.arange(30), index=pi)
140+
141+
shuffler = list(range(0, 30, 2)) + list(range(1, 31, 2))
142+
ser = ser_montonic[shuffler]
143+
nidx = ser.index
144+
145+
# Manually identified locations of year==2014
146+
indexer_2014 = np.array(
147+
[0, 1, 2, 3, 4, 5, 6, 15, 16, 17, 18, 19, 20], dtype=np.intp
148+
)
149+
assert (nidx[indexer_2014].year == 2014).all()
150+
assert not (nidx[~indexer_2014].year == 2014).any()
151+
152+
result = nidx.get_loc("2014")
153+
tm.assert_numpy_array_equal(result, indexer_2014)
154+
155+
expected = ser[indexer_2014]
156+
157+
result = nidx.get_value(ser, "2014")
158+
tm.assert_series_equal(result, expected)
159+
160+
result = ser.loc["2014"]
161+
tm.assert_series_equal(result, expected)
162+
163+
result = ser["2014"]
164+
tm.assert_series_equal(result, expected)
165+
166+
# Manually identified locations where ser.index is within Mat 2015
167+
indexer_may2015 = np.array([23], dtype=np.intp)
168+
assert nidx[23].year == 2015 and nidx[23].month == 5
169+
170+
result = nidx.get_loc("May 2015")
171+
tm.assert_numpy_array_equal(result, indexer_may2015)
172+
173+
expected = ser[indexer_may2015]
174+
175+
result = nidx.get_value(ser, "May 2015")
176+
tm.assert_series_equal(result, expected)
177+
178+
result = ser.loc["May 2015"]
179+
tm.assert_series_equal(result, expected)
180+
181+
result = ser["May 2015"]
182+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)