Skip to content

Commit cb53b55

Browse files
committed
BUG: List indexer on PeriodIndex doesn't coerce strings (#30515)
1 parent 7673357 commit cb53b55

File tree

4 files changed

+96
-3
lines changed

4 files changed

+96
-3
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,8 @@ Indexing
359359
- Bug in :class:`Index` constructor where an unhelpful error message was raised for ``numpy`` scalars (:issue:`33017`)
360360
- Bug in :meth:`DataFrame.lookup` incorrectly raising an ``AttributeError`` when ``frame.index`` or ``frame.columns`` is not unique; this will now raise a ``ValueError`` with a helpful error message (:issue:`33041`)
361361
- Bug in :meth:`DataFrame.iloc.__setitem__` creating a new array instead of overwriting ``Categorical`` values in-place (:issue:`32831`)
362+
- Indexer with a list of strings representing datetimes failed on :class:`PeriodIndex` or :class:`DatetimeIndex` (:issue:`11278`)
363+
-
362364

363365
Missing
364366
^^^^^^^

pandas/core/indexes/datetimelike.py

+12
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
3333
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3434
from pandas.core.base import IndexOpsMixin
35+
import pandas.core.common as com
36+
from pandas.core.construction import array as pd_array, extract_array
3537
import pandas.core.indexes.base as ibase
3638
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
3739
from pandas.core.indexes.extension import (
@@ -602,6 +604,16 @@ def delete(self, loc):
602604
arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
603605
return type(self)._simple_new(arr, name=self.name)
604606

607+
def _convert_arr_indexer(self, keyarr):
608+
if lib.infer_dtype(keyarr) == "string":
609+
# weak reasoning that indexer is a list of strings
610+
# representing datetime or timedelta or period
611+
extension_arr = pd_array(keyarr, self.dtype)
612+
converted_arr = extract_array(extension_arr, extract_numpy=True)
613+
else:
614+
converted_arr = com.asarray_tuplesafe(keyarr)
615+
return converted_arr
616+
605617

606618
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
607619
"""

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1259,11 +1259,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
12591259
indexer, keyarr = ax._convert_listlike_indexer(key)
12601260
# We only act on all found values:
12611261
if indexer is not None and (indexer != -1).all():
1262-
self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
1262+
self._validate_read_indexer(
1263+
keyarr, indexer, axis, raise_missing=raise_missing
1264+
)
12631265
return ax[indexer], indexer
12641266

12651267
if ax.is_unique and not getattr(ax, "is_overlapping", False):
1266-
indexer = ax.get_indexer_for(key)
1268+
indexer = ax.get_indexer_for(keyarr)
12671269
keyarr = ax.reindex(keyarr)[0]
12681270
else:
12691271
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)

pandas/tests/indexing/test_loc.py

+78-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import pandas as pd
9-
from pandas import DataFrame, Series, Timestamp, date_range
9+
from pandas import DataFrame, Period, Series, Timestamp, date_range, period_range
1010
import pandas._testing as tm
1111
from pandas.api.types import is_scalar
1212
from pandas.tests.indexing.common import Base
@@ -956,6 +956,83 @@ def test_loc_reverse_assignment(self):
956956

957957
tm.assert_series_equal(result, expected)
958958

959+
@pytest.mark.parametrize(
960+
"idx,labels,expected_idx",
961+
[
962+
(
963+
period_range(start="2000", periods=20, freq="D"),
964+
["2000-01-04", "2000-01-08", "2000-01-12"],
965+
[
966+
Period("2000-01-04", freq="D"),
967+
Period("2000-01-08", freq="D"),
968+
Period("2000-01-12", freq="D"),
969+
],
970+
),
971+
(
972+
date_range(start="2000", periods=20, freq="D"),
973+
["2000-01-04", "2000-01-08", "2000-01-12"],
974+
[
975+
Timestamp("2000-01-04", freq="D"),
976+
Timestamp("2000-01-08", freq="D"),
977+
Timestamp("2000-01-12", freq="D"),
978+
],
979+
),
980+
(
981+
pd.timedelta_range(start="1 day", periods=20),
982+
["4D", "8D", "12D"],
983+
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
984+
),
985+
],
986+
)
987+
def test_loc_with_list_of_strings_representing_datetimes(
988+
self, idx, labels, expected_idx
989+
):
990+
# GH 11278
991+
s = Series(range(20), index=idx)
992+
df = DataFrame(range(20), index=idx)
993+
994+
expected_value = [3, 7, 11]
995+
expected_s = Series(expected_value, expected_idx)
996+
expected_df = DataFrame(expected_value, expected_idx)
997+
998+
tm.assert_series_equal(expected_s, s.loc[labels])
999+
tm.assert_series_equal(expected_s, s[labels])
1000+
tm.assert_frame_equal(expected_df, df.loc[labels])
1001+
1002+
@pytest.mark.parametrize(
1003+
"idx,labels,msg",
1004+
[
1005+
(
1006+
period_range(start="2000", periods=20, freq="D"),
1007+
["2000-01-04", "2000-01-30"],
1008+
r"with any missing labels",
1009+
),
1010+
(
1011+
date_range(start="2000", periods=20, freq="D"),
1012+
["2000-01-04", "2000-01-30"],
1013+
r"with any missing labels",
1014+
),
1015+
(
1016+
pd.timedelta_range(start="1 day", periods=20),
1017+
["3 day", "30 day"],
1018+
r"with any missing labels",
1019+
),
1020+
],
1021+
)
1022+
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
1023+
self, idx, labels, msg
1024+
):
1025+
# GH 11278
1026+
s = Series(range(20), index=idx)
1027+
df = DataFrame(range(20), index=idx)
1028+
1029+
with pytest.raises(KeyError, match=msg):
1030+
s.loc[labels]
1031+
with pytest.raises(KeyError, match=msg):
1032+
s[labels]
1033+
with pytest.raises(KeyError, match=msg):
1034+
df.loc[labels]
1035+
9591036

9601037
def test_series_loc_getitem_label_list_missing_values():
9611038
# gh-11428

0 commit comments

Comments
 (0)