Skip to content

Commit 91f70fc

Browse files
committed
BUG: List indexer on PeriodIndex doesn't coerce strings (pandas-dev#30515)
1 parent ac3056f commit 91f70fc

File tree

5 files changed

+99
-4
lines changed

5 files changed

+99
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,8 @@ Indexing
160160
- Bug in :meth:`PeriodIndex.is_monotonic` incorrectly returning ``True`` when containing leading ``NaT`` entries (:issue:`31437`)
161161
- Bug in :meth:`DatetimeIndex.get_loc` raising ``KeyError`` with converted-integer key instead of the user-passed key (:issue:`31425`)
162162
- Bug in :meth:`Series.xs` incorrectly returning ``Timestamp`` instead of ``datetime64`` in some object-dtype cases (:issue:`31630`)
163+
- Indexer with a list of strings representing datetimes failed on :class:`PeriodIndex` or :class:`DatetimeIndex` (:issue:`11278`)
164+
-
163165

164166
Missing
165167
^^^^^^^

pandas/core/common.py

+4
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,10 @@ def asarray_tuplesafe(values, dtype=None):
225225
if isinstance(values, list) and dtype in [np.object_, object]:
226226
return construct_1d_object_array_from_listlike(values)
227227

228+
if isinstance(values, list) and hasattr(values, "__array__"):
229+
# avoid converting extension array to numpy array
230+
return values
231+
228232
result = np.asarray(values, dtype=dtype)
229233

230234
if issubclass(result.dtype.type, str):

pandas/core/indexes/base.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
is_signed_integer_dtype,
5050
is_timedelta64_dtype,
5151
is_unsigned_integer_dtype,
52+
needs_i8_conversion,
5253
)
5354
from pandas.core.dtypes.concat import concat_compat
5455
from pandas.core.dtypes.generic import (
@@ -71,6 +72,7 @@
7172
from pandas.core.arrays import ExtensionArray
7273
from pandas.core.base import IndexOpsMixin, PandasObject
7374
import pandas.core.common as com
75+
import pandas.core.construction as constrn
7476
from pandas.core.indexers import deprecate_ndim_indexing
7577
from pandas.core.indexes.frozen import FrozenList
7678
import pandas.core.missing as missing
@@ -3238,7 +3240,15 @@ def _convert_arr_indexer(self, keyarr):
32383240
-------
32393241
converted_keyarr : array-like
32403242
"""
3241-
keyarr = com.asarray_tuplesafe(keyarr)
3243+
3244+
if (
3245+
isinstance(keyarr, list)
3246+
and all(isinstance(k, str) for k in keyarr)
3247+
and needs_i8_conversion(self.dtype)
3248+
):
3249+
keyarr = constrn.array(keyarr, self.dtype)
3250+
else:
3251+
keyarr = com.asarray_tuplesafe(keyarr)
32423252
return keyarr
32433253

32443254
def _convert_index_indexer(self, keyarr):

pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1273,11 +1273,13 @@ def _get_listlike_indexer(self, key, axis: int, raise_missing: bool = False):
12731273
indexer, keyarr = ax._convert_listlike_indexer(key)
12741274
# We only act on all found values:
12751275
if indexer is not None and (indexer != -1).all():
1276-
self._validate_read_indexer(key, indexer, axis, raise_missing=raise_missing)
1276+
self._validate_read_indexer(
1277+
keyarr, indexer, axis, raise_missing=raise_missing
1278+
)
12771279
return ax[indexer], indexer
12781280

12791281
if ax.is_unique and not getattr(ax, "is_overlapping", False):
1280-
indexer = ax.get_indexer_for(key)
1282+
indexer = ax.get_indexer_for(keyarr)
12811283
keyarr = ax.reindex(keyarr)[0]
12821284
else:
12831285
keyarr, indexer, new_indexer = ax._reindex_non_unique(keyarr)

pandas/tests/indexing/test_loc.py

+78-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import pandas as pd
9-
from pandas import DataFrame, Series, Timestamp, date_range
9+
from pandas import DataFrame, Period, Series, Timestamp, date_range, period_range
1010
import pandas._testing as tm
1111
from pandas.api.types import is_scalar
1212
from pandas.tests.indexing.common import Base
@@ -897,6 +897,83 @@ def test_loc_reverse_assignment(self):
897897

898898
tm.assert_series_equal(result, expected)
899899

900+
@pytest.mark.parametrize(
901+
"idx,labels,expected_idx",
902+
[
903+
(
904+
period_range(start="2000", periods=20, freq="D"),
905+
["2000-01-04", "2000-01-08", "2000-01-12"],
906+
[
907+
Period("2000-01-04", freq="D"),
908+
Period("2000-01-08", freq="D"),
909+
Period("2000-01-12", freq="D"),
910+
],
911+
),
912+
(
913+
date_range(start="2000", periods=20, freq="D"),
914+
["2000-01-04", "2000-01-08", "2000-01-12"],
915+
[
916+
Timestamp("2000-01-04", freq="D"),
917+
Timestamp("2000-01-08", freq="D"),
918+
Timestamp("2000-01-12", freq="D"),
919+
],
920+
),
921+
(
922+
pd.timedelta_range(start="1 day", periods=20),
923+
["4D", "8D", "12D"],
924+
[pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day")],
925+
),
926+
],
927+
)
928+
def test_loc_with_list_of_strings_representing_datetimes(
929+
self, idx, labels, expected_idx
930+
):
931+
# GH 11278
932+
s = Series(range(20), index=idx)
933+
df = DataFrame(range(20), index=idx)
934+
935+
expected_value = [3, 7, 11]
936+
expected_s = Series(expected_value, expected_idx)
937+
expected_df = DataFrame(expected_value, expected_idx)
938+
939+
tm.assert_series_equal(expected_s, s.loc[labels])
940+
tm.assert_series_equal(expected_s, s[labels])
941+
tm.assert_frame_equal(expected_df, df.loc[labels])
942+
943+
@pytest.mark.parametrize(
944+
"idx,labels,msg",
945+
[
946+
(
947+
period_range(start="2000", periods=20, freq="D"),
948+
["2000-01-04", "2000-01-30"],
949+
r"with any missing labels",
950+
),
951+
(
952+
date_range(start="2000", periods=20, freq="D"),
953+
["2000-01-04", "2000-01-30"],
954+
r"with any missing labels",
955+
),
956+
(
957+
pd.timedelta_range(start="1 day", periods=20),
958+
["3 day", "30 day"],
959+
r"with any missing labels",
960+
),
961+
],
962+
)
963+
def test_loc_with_list_of_strings_representing_datetimes_missing_value(
964+
self, idx, labels, msg
965+
):
966+
# GH 11278
967+
s = Series(range(20), index=idx)
968+
df = DataFrame(range(20), index=idx)
969+
970+
with pytest.raises(KeyError, match=msg):
971+
s.loc[labels]
972+
with pytest.raises(KeyError, match=msg):
973+
s[labels]
974+
with pytest.raises(KeyError, match=msg):
975+
df.loc[labels]
976+
900977

901978
def test_series_loc_getitem_label_list_missing_values():
902979
# gh-11428

0 commit comments

Comments
 (0)