Skip to content

Commit 5e86611

Browse files
committed
List indexer on PeriodIndex doesn't coerce strings (pandas-dev#11278)
1 parent cde73af commit 5e86611

File tree

5 files changed

+125
-3
lines changed

5 files changed

+125
-3
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,7 @@ Interval
769769
Indexing
770770
^^^^^^^^
771771

772+
- List datetime string indexer could fail on :class:`PeriodIndex` or :class:`DatetimeIndex` (:issue:`11278`)
772773
- Bug in assignment using a reverse slicer (:issue:`26939`)
773774
- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`)
774775
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)

pandas/core/common.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515

1616
from pandas._libs import lib, tslibs
1717

18-
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
18+
from pandas.core.dtypes.cast import (
19+
construct_1d_datetimelike_array_from_listlike,
20+
construct_1d_object_array_from_listlike,
21+
)
1922
from pandas.core.dtypes.common import (
2023
is_array_like,
2124
is_bool_dtype,
25+
is_datetime64_any_dtype,
2226
is_extension_array_dtype,
2327
is_integer,
28+
is_period_dtype,
2429
)
2530
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
2631
from pandas.core.dtypes.inference import _iterable_not_string
@@ -224,6 +229,11 @@ def asarray_tuplesafe(values, dtype=None):
224229
if isinstance(values, list) and dtype in [np.object_, object]:
225230
return construct_1d_object_array_from_listlike(values)
226231

232+
if isinstance(values, list) and (
233+
is_datetime64_any_dtype(dtype) or is_period_dtype(dtype)
234+
):
235+
return construct_1d_datetimelike_array_from_listlike(values, dtype)
236+
227237
result = np.asarray(values, dtype=dtype)
228238

229239
if issubclass(result.dtype.type, str):

pandas/core/dtypes/cast.py

+36
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
is_bool_dtype,
2424
is_complex,
2525
is_complex_dtype,
26+
is_datetime64_any_dtype,
2627
is_datetime64_dtype,
2728
is_datetime64_ns_dtype,
2829
is_datetime64tz_dtype,
@@ -34,6 +35,7 @@
3435
is_integer,
3536
is_integer_dtype,
3637
is_object_dtype,
38+
is_period_dtype,
3739
is_scalar,
3840
is_string_dtype,
3941
is_timedelta64_dtype,
@@ -1505,3 +1507,37 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False):
15051507

15061508
if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
15071509
raise ValueError("Trying to coerce float values to integers")
1510+
1511+
1512+
def construct_1d_datetimelike_array_from_listlike(values, dtype):
1513+
"""
1514+
Transform any list-like object into a 1-dimensional pandas array of object dtype.
1515+
1516+
Parameters
1517+
----------
1518+
values : any list-like object
1519+
1520+
Raises
1521+
------
1522+
TypeError
1523+
If `values` is not list like or
1524+
dtype is not period type or datetime64 type
1525+
1526+
Returns
1527+
-------
1528+
1-dimensional pandas array of dtype object
1529+
"""
1530+
1531+
if not is_list_like(values):
1532+
raise TypeError(f"{values} not list-like")
1533+
1534+
if is_period_dtype(dtype):
1535+
from pandas.core.arrays import PeriodArray
1536+
1537+
arr = PeriodArray._from_sequence(values, dtype=dtype)
1538+
elif is_datetime64_any_dtype(dtype):
1539+
arr = maybe_cast_to_datetime(values, dtype=dtype)
1540+
else:
1541+
raise TypeError(f"{values} not datetime-like list")
1542+
1543+
return arr

pandas/core/indexes/base.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
from pandas.core.accessor import CachedAccessor
6767
import pandas.core.algorithms as algos
6868
from pandas.core.arrays import ExtensionArray
69+
from pandas.core.arrays.datetimes import DatetimeArray
70+
from pandas.core.arrays.period import PeriodArray
6971
from pandas.core.base import IndexOpsMixin, PandasObject
7072
import pandas.core.common as com
7173
from pandas.core.construction import extract_array
@@ -3058,7 +3060,12 @@ def _convert_listlike_indexer(self, keyarr, kind=None):
30583060

30593061
@Appender(_index_shared_docs["_convert_arr_indexer"])
30603062
def _convert_arr_indexer(self, keyarr):
3061-
keyarr = com.asarray_tuplesafe(keyarr)
3063+
if isinstance(keyarr, list) and (
3064+
is_datetime64_any_dtype(self.dtype) or is_period_dtype(self.dtype)
3065+
):
3066+
keyarr = com.asarray_tuplesafe(keyarr, self.dtype)
3067+
else:
3068+
keyarr = com.asarray_tuplesafe(keyarr)
30623069
return keyarr
30633070

30643071
_index_shared_docs[
@@ -3122,6 +3129,9 @@ def _convert_list_indexer(self, keyarr, kind=None):
31223129
keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
31233130
return keyarr
31243131

3132+
elif isinstance(keyarr, PeriodArray) or isinstance(keyarr, DatetimeArray):
3133+
return self.get_indexer_for(keyarr)
3134+
31253135
return None
31263136

31273137
def _invalid_indexer(self, form, key):

pandas/tests/indexing/test_loc.py

+66-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import pandas as pd
9-
from pandas import DataFrame, Series, Timestamp, date_range
9+
from pandas import DataFrame, Period, Series, Timestamp, date_range, period_range
1010
from pandas.api.types import is_scalar
1111
from pandas.tests.indexing.common import Base
1212
import pandas.util.testing as tm
@@ -939,6 +939,71 @@ def test_loc_reverse_assignment(self):
939939

940940
tm.assert_series_equal(result, expected)
941941

942+
@pytest.mark.parametrize(
943+
"idx,labels,expected_idx",
944+
[
945+
(
946+
period_range(start="2000", periods=20, freq="D"),
947+
["2000-01-04", "2000-01-08", "2000-01-12"],
948+
[
949+
Period("2000-01-04", freq="D"),
950+
Period("2000-01-08", freq="D"),
951+
Period("2000-01-12", freq="D"),
952+
],
953+
),
954+
(
955+
date_range(start="2000", periods=20, freq="D"),
956+
["2000-01-04", "2000-01-08", "2000-01-12"],
957+
[
958+
Timestamp("2000-01-04", freq="D"),
959+
Timestamp("2000-01-08", freq="D"),
960+
Timestamp("2000-01-12", freq="D"),
961+
],
962+
),
963+
],
964+
)
965+
def test_loc_with_datetime_string_list(self, idx, labels, expected_idx):
966+
# GH 11278
967+
s = Series(range(20), index=idx)
968+
df = DataFrame(range(20), index=idx)
969+
970+
expected_value = [3, 7, 11]
971+
expected_s = Series(expected_value, expected_idx)
972+
expected_df = DataFrame(expected_value, expected_idx)
973+
974+
tm.assert_series_equal(expected_s, s.loc[labels])
975+
tm.assert_series_equal(expected_s, s[labels])
976+
tm.assert_frame_equal(expected_df, df.loc[labels])
977+
978+
@pytest.mark.parametrize(
979+
"idx,labels,msg",
980+
[
981+
(
982+
period_range(start="2000", periods=20, freq="D"),
983+
["2000-01-04", "2000-01-30"],
984+
r"None of \[PeriodIndex\(\['2000-01-04', '2000-01-30'\], "
985+
r"dtype='period\[D\]', freq='D'\)\] are in the \[index\]",
986+
),
987+
(
988+
date_range(start="2000", periods=20, freq="D"),
989+
["2000-01-04", "2000-01-30"],
990+
r"None of \[DatetimeIndex\(\['2000-01-04', '2000-01-30'\], "
991+
r"dtype='datetime64\[ns\]', freq=None\)\] are in the \[index\]",
992+
),
993+
],
994+
)
995+
def test_loc_with_datetime_string_and_missing_value(self, idx, labels, msg):
996+
# GH 11278
997+
s = Series(range(20), index=idx)
998+
df = DataFrame(range(20), index=idx)
999+
1000+
with pytest.raises(KeyError, match=msg):
1001+
s.loc[labels]
1002+
with pytest.raises(KeyError, match=msg):
1003+
s[labels]
1004+
with pytest.raises(KeyError, match=msg):
1005+
df.loc[labels]
1006+
9421007

9431008
def test_series_loc_getitem_label_list_missing_values():
9441009
# gh-11428

0 commit comments

Comments
 (0)