Skip to content

Commit 9f008f5

Browse files
committed
List indexer on PeriodIndex doesn't coerce strings (pandas-dev#11278)
1 parent cde73af commit 9f008f5

File tree

6 files changed

+140
-4
lines changed

6 files changed

+140
-4
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -778,6 +778,7 @@ Indexing
778778
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with non-string categories didn't work (:issue:`17569`, :issue:`30225`)
779779
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
780780
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
781+
- List datetime string indexer could fail on `PeriodIndex` or `DatetimeIndex` (:issue:`11278`)
781782

782783
Missing
783784
^^^^^^^

pandas/core/common.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,17 @@
1515

1616
from pandas._libs import lib, tslibs
1717

18-
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
18+
from pandas.core.dtypes.cast import (
19+
construct_1d_datetimelike_array_from_listlike,
20+
construct_1d_object_array_from_listlike,
21+
)
1922
from pandas.core.dtypes.common import (
2023
is_array_like,
2124
is_bool_dtype,
25+
is_datetime64_any_dtype,
2226
is_extension_array_dtype,
2327
is_integer,
28+
is_period_dtype,
2429
)
2530
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
2631
from pandas.core.dtypes.inference import _iterable_not_string
@@ -224,6 +229,11 @@ def asarray_tuplesafe(values, dtype=None):
224229
if isinstance(values, list) and dtype in [np.object_, object]:
225230
return construct_1d_object_array_from_listlike(values)
226231

232+
if isinstance(values, list) and (
233+
is_datetime64_any_dtype(dtype) or is_period_dtype(dtype)
234+
):
235+
return construct_1d_datetimelike_array_from_listlike(values, dtype)
236+
227237
result = np.asarray(values, dtype=dtype)
228238

229239
if issubclass(result.dtype.type, str):

pandas/core/dtypes/cast.py

+36
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
is_bool_dtype,
2424
is_complex,
2525
is_complex_dtype,
26+
is_datetime64_any_dtype,
2627
is_datetime64_dtype,
2728
is_datetime64_ns_dtype,
2829
is_datetime64tz_dtype,
@@ -34,6 +35,7 @@
3435
is_integer,
3536
is_integer_dtype,
3637
is_object_dtype,
38+
is_period_dtype,
3739
is_scalar,
3840
is_string_dtype,
3941
is_timedelta64_dtype,
@@ -1505,3 +1507,37 @@ def maybe_cast_to_integer_array(arr, dtype, copy: bool = False):
15051507

15061508
if is_integer_dtype(dtype) and (is_float_dtype(arr) or is_object_dtype(arr)):
15071509
raise ValueError("Trying to coerce float values to integers")
1510+
1511+
1512+
def construct_1d_datetimelike_array_from_listlike(values, dtype):
1513+
"""
1514+
Transform any list-like object into a 1-dimensional pandas array of object dtype.
1515+
1516+
Parameters
1517+
----------
1518+
values : any list-like object
1519+
1520+
Raises
1521+
------
1522+
TypeError
1523+
If `values` is not list like or
1524+
dtype is not period type or datetime64 type
1525+
1526+
Returns
1527+
-------
1528+
1-dimensional pandas array of dtype object
1529+
"""
1530+
1531+
if not is_list_like(values):
1532+
raise TypeError(f"{values} not list-like")
1533+
1534+
if is_period_dtype(dtype):
1535+
from pandas.core.arrays import PeriodArray
1536+
1537+
arr = PeriodArray._from_sequence(values, dtype=dtype)
1538+
elif is_datetime64_any_dtype(dtype):
1539+
arr = maybe_cast_to_datetime(values, dtype=dtype)
1540+
else:
1541+
raise TypeError(f"{values} not datetime-like list")
1542+
1543+
return arr

pandas/core/indexes/base.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@
6666
from pandas.core.accessor import CachedAccessor
6767
import pandas.core.algorithms as algos
6868
from pandas.core.arrays import ExtensionArray
69+
from pandas.core.arrays.datetimes import DatetimeArray
70+
from pandas.core.arrays.period import PeriodArray
6971
from pandas.core.base import IndexOpsMixin, PandasObject
7072
import pandas.core.common as com
7173
from pandas.core.construction import extract_array
@@ -3058,7 +3060,13 @@ def _convert_listlike_indexer(self, keyarr, kind=None):
30583060

30593061
@Appender(_index_shared_docs["_convert_arr_indexer"])
30603062
def _convert_arr_indexer(self, keyarr):
3061-
keyarr = com.asarray_tuplesafe(keyarr)
3063+
if isinstance(keyarr, list) and (
3064+
is_datetime64_any_dtype(self.dtype)
3065+
or is_period_dtype(self.dtype)
3066+
):
3067+
keyarr = com.asarray_tuplesafe(keyarr, self.dtype)
3068+
else:
3069+
keyarr = com.asarray_tuplesafe(keyarr)
30623070
return keyarr
30633071

30643072
_index_shared_docs[
@@ -3122,6 +3130,17 @@ def _convert_list_indexer(self, keyarr, kind=None):
31223130
keyarr = np.where(keyarr < 0, len(self) + keyarr, keyarr)
31233131
return keyarr
31243132

3133+
elif (
3134+
isinstance(keyarr, PeriodArray) or
3135+
isinstance(keyarr, DatetimeArray)
3136+
):
3137+
indexer = self.get_indexer_for(keyarr)
3138+
if (indexer >= 0).all():
3139+
return indexer
3140+
3141+
indexer[indexer < 0] = len(self)
3142+
return maybe_convert_indices(indexer, len(self))
3143+
31253144
return None
31263145

31273146
def _invalid_indexer(self, form, key):

pandas/core/indexing.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from pandas._libs.indexing import _NDFrameIndexerBase
66
from pandas._libs.lib import item_from_zerodim
7+
from pandas._libs.tslibs import Period, Timestamp
78
from pandas.errors import AbstractMethodError
89
from pandas.util._decorators import Appender
910

@@ -18,7 +19,13 @@
1819
is_sparse,
1920
)
2021
from pandas.core.dtypes.concat import concat_compat
21-
from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
22+
from pandas.core.dtypes.generic import (
23+
ABCDataFrame,
24+
ABCDatetimeIndex,
25+
ABCMultiIndex,
26+
ABCPeriodIndex,
27+
ABCSeries,
28+
)
2229
from pandas.core.dtypes.missing import _infer_fill_value, isna
2330

2431
import pandas.core.common as com

pandas/tests/indexing/test_loc.py

+64-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
import pandas as pd
9-
from pandas import DataFrame, Series, Timestamp, date_range
9+
from pandas import DataFrame, Period, Series, Timestamp, date_range, period_range
1010
from pandas.api.types import is_scalar
1111
from pandas.tests.indexing.common import Base
1212
import pandas.util.testing as tm
@@ -939,6 +939,69 @@ def test_loc_reverse_assignment(self):
939939

940940
tm.assert_series_equal(result, expected)
941941

942+
@pytest.mark.parametrize(
943+
"idx,labels,expected_idx",
944+
[
945+
(
946+
period_range(start="2000", periods=20, freq="D"),
947+
["2000-01-04", "2000-01-08", "2000-01-12"],
948+
[
949+
Period("2000-01-04", freq="D"),
950+
Period("2000-01-08", freq="D"),
951+
Period("2000-01-12", freq="D"),
952+
],
953+
),
954+
(
955+
date_range(start="2000", periods=20, freq="D"),
956+
["2000-01-04", "2000-01-08", "2000-01-12"],
957+
[
958+
Timestamp("2000-01-04", freq="D"),
959+
Timestamp("2000-01-08", freq="D"),
960+
Timestamp("2000-01-12", freq="D"),
961+
],
962+
),
963+
],
964+
)
965+
def test_loc_with_datetime_string_list(self, idx, labels, expected_idx):
966+
# GH 11278
967+
s = Series(range(20), index=idx)
968+
df = DataFrame(range(20), index=idx)
969+
970+
expected_value = [3, 7, 11]
971+
expected_s = Series(expected_value, expected_idx)
972+
expected_df = DataFrame(expected_value, expected_idx)
973+
974+
tm.assert_series_equal(expected_s, s.loc[labels])
975+
tm.assert_series_equal(expected_s, s[labels])
976+
tm.assert_frame_equal(expected_df, df.loc[labels])
977+
978+
@pytest.mark.parametrize(
979+
"idx,labels,msg",
980+
[
981+
(
982+
period_range(start="2000", periods=20, freq="D"),
983+
["2000-01-04", "2000-01-30"],
984+
"indices are out-of-bounds"
985+
),
986+
(
987+
date_range(start="2000", periods=20, freq="D"),
988+
["2000-01-04", "2000-01-30"],
989+
"indices are out-of-bounds"
990+
)
991+
],
992+
)
993+
def test_loc_with_datetime_string_and_missing_value(self, idx, labels, msg):
994+
# GH 11278
995+
s = Series(range(20), index=idx)
996+
df = DataFrame(range(20), index=idx)
997+
998+
with pytest.raises(IndexError, match=msg):
999+
s.loc[labels]
1000+
with pytest.raises(IndexError, match=msg):
1001+
s[labels]
1002+
with pytest.raises(IndexError, match=msg):
1003+
df.loc[labels]
1004+
9421005

9431006
def test_series_loc_getitem_label_list_missing_values():
9441007
# gh-11428

0 commit comments

Comments
 (0)