Skip to content

Commit 3efb005

Browse files
committed
BUG: List indexer on PeriodIndex doesn't coerce strings (pandas-dev#30515)
1 parent c6c5367 commit 3efb005

File tree

4 files changed

+140
-3
lines changed

4 files changed

+140
-3
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ Indexing
474474
- Bug in :meth:`DataFrame.copy` _item_cache not invalidated after copy causes post-copy value updates to not be reflected (:issue:`31784`)
475475
- Bug in `Series.__getitem__` with an integer key and a :class:`MultiIndex` with leading integer level failing to raise ``KeyError`` if the key is not present in the first level (:issue:`33355`)
476476
- Bug in :meth:`DataFrame.iloc` when slicing a single column-:class:`DataFrame`` with ``ExtensionDtype`` (e.g. ``df.iloc[:, :1]``) returning an invalid result (:issue:`32957`)
477+
- Indexing with a list of strings representing datetimes failed on :class:`DatetimeIndex` or :class:`PeriodIndex`(:issue:`11278`)
477478

478479
Missing
479480
^^^^^^^

pandas/core/indexes/datetimelike.py

+19
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
3333
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3434
from pandas.core.base import IndexOpsMixin
35+
import pandas.core.common as com
36+
from pandas.core.construction import array as pd_array, extract_array
3537
import pandas.core.indexes.base as ibase
3638
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
3739
from pandas.core.indexes.extension import (
@@ -41,6 +43,7 @@
4143
)
4244
from pandas.core.indexes.numeric import Int64Index
4345
from pandas.core.ops import get_op_result_name
46+
from pandas.core.tools.datetimes import DateParseError
4447
from pandas.core.tools.timedeltas import to_timedelta
4548

4649
from pandas.tseries.frequencies import DateOffset
@@ -602,6 +605,22 @@ def delete(self, loc):
602605
arr = type(self._data)._simple_new(new_i8s, dtype=self.dtype, freq=freq)
603606
return type(self)._simple_new(arr, name=self.name)
604607

608+
@doc(Index._convert_arr_indexer)
609+
def _convert_arr_indexer(self, keyarr):
610+
if lib.infer_dtype(keyarr) == "string":
611+
# Weak reasoning that indexer is a list of strings
612+
# representing datetime or timedelta or period
613+
try:
614+
extension_arr = pd_array(keyarr, self.dtype)
615+
except (ValueError, DateParseError):
616+
# Fail to infer keyarr from self.dtype
617+
return keyarr
618+
619+
converted_arr = extract_array(extension_arr, extract_numpy=True)
620+
else:
621+
converted_arr = com.asarray_tuplesafe(keyarr)
622+
return converted_arr
623+
605624

606625
class DatetimeTimedeltaMixin(DatetimeIndexOpsMixin, Int64Index):
607626
"""

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1257,11 +1257,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
12571257
indexer, keyarr = ax._convert_listlike_indexer(key)
12581258
# We only act on all found values:
12591259
if indexer is not None and (indexer != -1).all():
1260-
self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
1260+
self._validate_read_indexer(
1261+
keyarr, indexer, axis, raise_missing=raise_missing
1262+
)
12611263
return ax[indexer], indexer
12621264

12631265
if ax.is_unique and not getattr(ax, "is_overlapping", False):
1264-
indexer = ax.get_indexer_for(key)
1266+
indexer = ax.get_indexer_for(keyarr)
12651267
keyarr = ax.reindex(keyarr)[0]
12661268
else:
12671269
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)

pandas/tests/indexing/test_partial.py

+116-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import pytest
99

1010
import pandas as pd
11-
from pandas import DataFrame, Index, Series, date_range
11+
from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range
1212
import pandas._testing as tm
1313

1414

@@ -525,3 +525,118 @@ def test_partial_set_empty_frame_empty_consistencies(self):
525525
df.loc[0, "x"] = 1
526526
expected = DataFrame(dict(x=[1], y=[np.nan]))
527527
tm.assert_frame_equal(df, expected, check_dtype=False)
528+
529+
@pytest.mark.parametrize(
530+
"idx,labels,expected_idx",
531+
[
532+
(
533+
period_range(start="2000", periods=20, freq="D"),
534+
["2000-01-04", "2000-01-08", "2000-01-12"],
535+
[
536+
Period("2000-01-04", freq="D"),
537+
Period("2000-01-08", freq="D"),
538+
Period("2000-01-12", freq="D"),
539+
],
540+
),
541+
(
542+
date_range(start="2000", periods=20, freq="D"),
543+
["2000-01-04", "2000-01-08", "2000-01-12"],
544+
[
545+
Timestamp("2000-01-04", freq="D"),
546+
Timestamp("2000-01-08", freq="D"),
547+
Timestamp("2000-01-12", freq="D"),
548+
],
549+
),
550+
(
551+
pd.timedelta_range(start="1 day", periods=20),
552+
["4D", "8D", "12D"],
553+
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
554+
),
555+
],
556+
)
557+
def test_loc_with_list_of_strings_representing_datetimes(
558+
self, idx, labels, expected_idx
559+
):
560+
# GH 11278
561+
s = Series(range(20), index=idx)
562+
df = DataFrame(range(20), index=idx)
563+
564+
expected_value = [3, 7, 11]
565+
expected_s = Series(expected_value, expected_idx)
566+
expected_df = DataFrame(expected_value, expected_idx)
567+
568+
tm.assert_series_equal(expected_s, s.loc[labels])
569+
tm.assert_series_equal(expected_s, s[labels])
570+
tm.assert_frame_equal(expected_df, df.loc[labels])
571+
572+
@pytest.mark.parametrize(
573+
"idx,labels",
574+
[
575+
(
576+
period_range(start="2000", periods=20, freq="D"),
577+
["2000-01-04", "2000-01-30"],
578+
),
579+
(
580+
date_range(start="2000", periods=20, freq="D"),
581+
["2000-01-04", "2000-01-30"],
582+
),
583+
(pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]),
584+
],
585+
)
586+
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
587+
self, idx, labels
588+
):
589+
# GH 11278
590+
s = Series(range(20), index=idx)
591+
df = DataFrame(range(20), index=idx)
592+
msg = r"with any missing labels"
593+
594+
with pytest.raises(KeyError, match=msg):
595+
s.loc[labels]
596+
with pytest.raises(KeyError, match=msg):
597+
s[labels]
598+
with pytest.raises(KeyError, match=msg):
599+
df.loc[labels]
600+
601+
@pytest.mark.parametrize(
602+
"idx,labels,msg",
603+
[
604+
(
605+
period_range(start="2000", periods=20, freq="D"),
606+
["4D", "8D"],
607+
(
608+
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\]"
609+
r" are in the \[index\]"
610+
),
611+
),
612+
(
613+
date_range(start="2000", periods=20, freq="D"),
614+
["4D", "8D"],
615+
(
616+
r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\]"
617+
r" are in the \[index\]"
618+
),
619+
),
620+
(
621+
pd.timedelta_range(start="1 day", periods=20),
622+
["2000-01-04", "2000-01-08"],
623+
(
624+
r"None of \[Index\(\['2000-01-04', '2000-01-08'\],"
625+
r" dtype='object'\)\] are in the \[index\]"
626+
),
627+
),
628+
],
629+
)
630+
def test_loc_with_list_of_strings_representing_datetimes_not_matched_type(
631+
self, idx, labels, msg
632+
):
633+
# GH 11278
634+
s = Series(range(20), index=idx)
635+
df = DataFrame(range(20), index=idx)
636+
637+
with pytest.raises(KeyError, match=msg):
638+
s.loc[labels]
639+
with pytest.raises(KeyError, match=msg):
640+
s[labels]
641+
with pytest.raises(KeyError, match=msg):
642+
df.loc[labels]

0 commit comments

Comments
 (0)