Skip to content

Commit a3477c7

Browse files
authored
BUG: List indexer on PeriodIndex doesn't coerce strings (#30515) (#30515)
1 parent dd84044 commit a3477c7

File tree

4 files changed

+140
-3
lines changed

4 files changed

+140
-3
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,7 @@ Indexing
635635
- Bug in :meth:`Series.__setitem__` with an :class:`IntervalIndex` and a list-like key of integers (:issue:`33473`)
636636
- Bug in :meth:`Series.__getitem__` allowing missing labels with ``np.ndarray``, :class:`Index`, :class:`Series` indexers but not ``list``, these now all raise ``KeyError`` (:issue:`33646`)
637637
- Bug in :meth:`DataFrame.truncate` and :meth:`Series.truncate` where index was assumed to be monotone increasing (:issue:`33756`)
638+
- Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`)
638639

639640
Missing
640641
^^^^^^^

pandas/core/indexes/datetimelike.py

+19
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
3030
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3131
from pandas.core.base import IndexOpsMixin
32+
import pandas.core.common as com
33+
from pandas.core.construction import array as pd_array, extract_array
3234
import pandas.core.indexes.base as ibase
3335
from pandas.core.indexes.base import Index, _index_shared_docs
3436
from pandas.core.indexes.extension import (
@@ -39,6 +41,7 @@
3941
from pandas.core.indexes.numeric import Int64Index
4042
from pandas.core.ops import get_op_result_name
4143
from pandas.core.sorting import ensure_key_mapped
44+
from pandas.core.tools.datetimes import DateParseError
4245
from pandas.core.tools.timedeltas import to_timedelta
4346

4447
from pandas.tseries.offsets import DateOffset, Tick
@@ -573,6 +576,22 @@ def _wrap_joined_index(self, joined: np.ndarray, other):
573576

574577
return type(self)._simple_new(new_data, name=name)
575578

579+
@doc(Index._convert_arr_indexer)
580+
def _convert_arr_indexer(self, keyarr):
581+
if lib.infer_dtype(keyarr) == "string":
582+
# Weak reasoning that indexer is a list of strings
583+
# representing datetime or timedelta or period
584+
try:
585+
extension_arr = pd_array(keyarr, self.dtype)
586+
except (ValueError, DateParseError):
587+
# Fail to infer keyarr from self.dtype
588+
return keyarr
589+
590+
converted_arr = extract_array(extension_arr, extract_numpy=True)
591+
else:
592+
converted_arr = com.asarray_tuplesafe(keyarr)
593+
return converted_arr
594+
576595

577596
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
578597
"""

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1228,11 +1228,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
12281228
indexer, keyarr = ax._convert_listlike_indexer(key)
12291229
# We only act on all found values:
12301230
if indexer is not None and (indexer != -1).all():
1231-
self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
1231+
self._validate_read_indexer(
1232+
keyarr, indexer, axis, raise_missing=raise_missing
1233+
)
12321234
return ax[indexer], indexer
12331235

12341236
if ax.is_unique and not getattr(ax, "is_overlapping", False):
1235-
indexer = ax.get_indexer_for(key)
1237+
indexer = ax.get_indexer_for(keyarr)
12361238
keyarr = ax.reindex(keyarr)[0]
12371239
else:
12381240
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)

pandas/tests/indexing/test_partial.py

+116-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99

1010
import pandas as pd
11-
from pandas import DataFrame, Index, Series, date_range
11+
from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range
1212
import pandas._testing as tm
1313

1414

@@ -535,3 +535,118 @@ def test_partial_set_empty_frame_empty_consistencies(self):
535535
df.loc[0, "x"] = 1
536536
expected = DataFrame(dict(x=[1], y=[np.nan]))
537537
tm.assert_frame_equal(df, expected, check_dtype=False)
538+
539+
@pytest.mark.parametrize(
540+
"idx,labels,expected_idx",
541+
[
542+
(
543+
period_range(start="2000", periods=20, freq="D"),
544+
["2000-01-04", "2000-01-08", "2000-01-12"],
545+
[
546+
Period("2000-01-04", freq="D"),
547+
Period("2000-01-08", freq="D"),
548+
Period("2000-01-12", freq="D"),
549+
],
550+
),
551+
(
552+
date_range(start="2000", periods=20, freq="D"),
553+
["2000-01-04", "2000-01-08", "2000-01-12"],
554+
[
555+
Timestamp("2000-01-04", freq="D"),
556+
Timestamp("2000-01-08", freq="D"),
557+
Timestamp("2000-01-12", freq="D"),
558+
],
559+
),
560+
(
561+
pd.timedelta_range(start="1 day", periods=20),
562+
["4D", "8D", "12D"],
563+
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
564+
),
565+
],
566+
)
567+
def test_loc_with_list_of_strings_representing_datetimes(
568+
self, idx, labels, expected_idx
569+
):
570+
# GH 11278
571+
s = Series(range(20), index=idx)
572+
df = DataFrame(range(20), index=idx)
573+
574+
expected_value = [3, 7, 11]
575+
expected_s = Series(expected_value, expected_idx)
576+
expected_df = DataFrame(expected_value, expected_idx)
577+
578+
tm.assert_series_equal(expected_s, s.loc[labels])
579+
tm.assert_series_equal(expected_s, s[labels])
580+
tm.assert_frame_equal(expected_df, df.loc[labels])
581+
582+
@pytest.mark.parametrize(
583+
"idx,labels",
584+
[
585+
(
586+
period_range(start="2000", periods=20, freq="D"),
587+
["2000-01-04", "2000-01-30"],
588+
),
589+
(
590+
date_range(start="2000", periods=20, freq="D"),
591+
["2000-01-04", "2000-01-30"],
592+
),
593+
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
594+
],
595+
)
596+
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
597+
self, idx, labels
598+
):
599+
# GH 11278
600+
s = Series(range(20), index=idx)
601+
df = DataFrame(range(20), index=idx)
602+
msg = r"with any missing labels"
603+
604+
with pytest.raises(KeyError, match=msg):
605+
s.loc[labels]
606+
with pytest.raises(KeyError, match=msg):
607+
s[labels]
608+
with pytest.raises(KeyError, match=msg):
609+
df.loc[labels]
610+
611+
@pytest.mark.parametrize(
612+
"idx,labels,msg",
613+
[
614+
(
615+
period_range(start="2000", periods=20, freq="D"),
616+
["4D", "8D"],
617+
(
618+
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
619+
r"are in the \[index\]"
620+
),
621+
),
622+
(
623+
date_range(start="2000", periods=20, freq="D"),
624+
["4D", "8D"],
625+
(
626+
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] "
627+
r"are in the \[index\]"
628+
),
629+
),
630+
(
631+
pd.timedelta_range(start="1 day", periods=20),
632+
["2000-01-04", "2000-01-08"],
633+
(
634+
r"None of \[Index\(\['2000-01-04', '2000-01-08'\], "
635+
r"dtype='object'\)\] are in the \[index\]"
636+
),
637+
),
638+
],
639+
)
640+
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
641+
self, idx, labels, msg
642+
):
643+
# GH 11278
644+
s = Series(range(20), index=idx)
645+
df = DataFrame(range(20), index=idx)
646+
647+
with pytest.raises(KeyError, match=msg):
648+
s.loc[labels]
649+
with pytest.raises(KeyError, match=msg):
650+
s[labels]
651+
with pytest.raises(KeyError, match=msg):
652+
df.loc[labels]

0 commit comments

Comments
 (0)