Skip to content

Commit b138f94

Browse files
committed
BUG: List indexer on PeriodIndex doesn't coerce strings (pandas-dev#30515)
1 parent c6c5367 commit b138f94

File tree

5 files changed

+97
-4
lines changed

5 files changed

+97
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ Indexing
474474
- Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`)
475475
- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`)
476476
- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`)
477+
- Indexer with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`)
477478

478479
Missing
479480
^^^^^^^

pandas/core/indexes/datetimelike.py

+13
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
3333
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3434
from pandas.core.base import IndexOpsMixin
35+
import pandas.core.common as com
36+
from pandas.core.construction import array as pd_array, extract_array
3537
import pandas.core.indexes.base as ibase
3638
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
3739
from pandas.core.indexes.extension import (
@@ -602,6 +604,17 @@ def delete(self, loc):
602604
arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
603605
return type(self)._simple_new(arr, name=self.name)
604606

607+
@doc(Index._convert_arr_indexer)
608+
def _convert_arr_indexer(self, keyarr):
609+
if lib.infer_dtype(keyarr) == "string":
610+
# Weak reasoning that indexer is a list of strings
611+
# representing datetime or timedelta or period
612+
extension_arr = pd_array(keyarr, self.dtype)
613+
converted_arr = extract_array(extension_arr, extract_numpy=True)
614+
else:
615+
converted_arr = com.asarray_tuplesafe(keyarr)
616+
return converted_arr
617+
605618

606619
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
607620
"""

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1257,11 +1257,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
12571257
indexer, keyarr = ax._convert_listlike_indexer(key)
12581258
# We only act on all found values:
12591259
if indexer is not None and (indexer != -1).all():
1260-
self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
1260+
self._validate_read_indexer(
1261+
keyarr, indexer, axis, raise_missing=raise_missing
1262+
)
12611263
return ax[indexer], indexer
12621264

12631265
if ax.is_unique and not getattr(ax, "is_overlapping", False):
1264-
indexer = ax.get_indexer_for(key)
1266+
indexer = ax.get_indexer_for(keyarr)
12651267
keyarr = ax.reindex(keyarr)[0]
12661268
else:
12671269
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)

pandas/tests/indexing/test_loc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import pandas as pd
9-
from pandas import DataFrame, Series, Timestamp, date_range
9+
from pandas import DataFrame, Period, Series, Timestamp, date_range, period_range
1010
import pandas._testing as tm
1111
from pandas.api.types import is_scalar
1212
from pandas.tests.indexing.common import Base

pandas/tests/indexing/test_partial.py

+78-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99

1010
import pandas as pd
11-
from pandas import DataFrame, Index, Series, date_range
11+
from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range
1212
import pandas._testing as tm
1313

1414

@@ -525,3 +525,80 @@ def test_partial_set_empty_frame_empty_consistencies(self):
525525
df.loc[0, "x"] = 1
526526
expected = DataFrame(dict(x=[1], y=[np.nan]))
527527
tm.assert_frame_equal(df, expected, check_dtype=False)
528+
529+
@pytest.mark.parametrize(
530+
"idx,labels,expected_idx",
531+
[
532+
(
533+
period_range(start="2000", periods=20, freq="D"),
534+
["2000-01-04", "2000-01-08", "2000-01-12"],
535+
[
536+
Period("2000-01-04", freq="D"),
537+
Period("2000-01-08", freq="D"),
538+
Period("2000-01-12", freq="D"),
539+
],
540+
),
541+
(
542+
date_range(start="2000", periods=20, freq="D"),
543+
["2000-01-04", "2000-01-08", "2000-01-12"],
544+
[
545+
Timestamp("2000-01-04", freq="D"),
546+
Timestamp("2000-01-08", freq="D"),
547+
Timestamp("2000-01-12", freq="D"),
548+
],
549+
),
550+
(
551+
pd.timedelta_range(start="1 day", periods=20),
552+
["4D", "8D", "12D"],
553+
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
554+
),
555+
],
556+
)
557+
def test_loc_with_list_of_strings_representing_datetimes(
558+
self, idx, labels, expected_idx
559+
):
560+
# GH 11278
561+
s = Series(range(20), index=idx)
562+
df = DataFrame(range(20), index=idx)
563+
564+
expected_value = [3, 7, 11]
565+
expected_s = Series(expected_value, expected_idx)
566+
expected_df = DataFrame(expected_value, expected_idx)
567+
568+
tm.assert_series_equal(expected_s, s.loc[labels])
569+
tm.assert_series_equal(expected_s, s[labels])
570+
tm.assert_frame_equal(expected_df, df.loc[labels])
571+
572+
@pytest.mark.parametrize(
573+
"idx,labels,msg",
574+
[
575+
(
576+
period_range(start="2000", periods=20, freq="D"),
577+
["2000-01-04", "2000-01-30"],
578+
r"with any missing labels",
579+
),
580+
(
581+
date_range(start="2000", periods=20, freq="D"),
582+
["2000-01-04", "2000-01-30"],
583+
r"with any missing labels",
584+
),
585+
(
586+
pd.timedelta_range(start="1 day", periods=20),
587+
["3 day", "30 day"],
588+
r"with any missing labels",
589+
),
590+
],
591+
)
592+
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
593+
self, idx, labels, msg
594+
):
595+
# GH 11278
596+
s = Series(range(20), index=idx)
597+
df = DataFrame(range(20), index=idx)
598+
599+
with pytest.raises(KeyError, match=msg):
600+
s.loc[labels]
601+
with pytest.raises(KeyError, match=msg):
602+
s[labels]
603+
with pytest.raises(KeyError, match=msg):
604+
df.loc[labels]

0 commit comments

Comments
 (0)