Skip to content

Commit b7456b9

Browse files
committed
BUG: loc misbehaves when Period is at start of 3-level MultiIndex
(pandas-dev#20684) If `MultiIndex` is in PeriodIndex, `.loc` would raise exception with a miss match key `PeriodEngine.get_loc` can't accept Period
1 parent 96bf661 commit b7456b9

File tree

5 files changed

+116
-6
lines changed

5 files changed

+116
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ Missing
173173

174174
MultiIndex
175175
^^^^^^^^^^
176-
177176
-
177+
- Using `.loc` with a `PeriodIndex` as a level in a `MultiIndex`, `.loc` could not raise exception when key is not matched (:issue:`20684`)
178178
-
179179

180180
I/O

pandas/_libs/index.pyx

+37-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ from pandas._libs.hashtable cimport HashTable
2424
from pandas._libs import algos, hashtable as _hash
2525
from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
2626
from pandas._libs.missing import checknull
27+
from pandas._libs.tslibs.util cimport is_period_object
2728

2829
cdef int64_t NPY_NAT = util.get_nat()
2930

@@ -479,7 +480,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
479480
cdef class PeriodEngine(Int64Engine):
480481

481482
cdef _get_index_values(self):
482-
return super(PeriodEngine, self).vgetter()
483+
return super(PeriodEngine, self).vgetter().view('i8')
483484

484485
cpdef _call_map_locations(self, values):
485486
super(PeriodEngine, self)._call_map_locations(values.view('i8'))
@@ -518,6 +519,41 @@ cdef class PeriodEngine(Int64Engine):
518519

519520
return super(PeriodEngine, self).get_indexer_non_unique(ordinal_array)
520521

522+
cpdef get_loc(self, object key):
523+
if is_definitely_invalid_key(key):
524+
raise TypeError("'{val}' is an invalid key".format(val=key))
525+
526+
key = key.ordinal if is_period_object(key) else key
527+
528+
if self.over_size_threshold and self.is_monotonic_increasing:
529+
if not self.is_unique:
530+
return self._get_loc_duplicates(key)
531+
532+
values = self._get_index_values()
533+
self._check_type(key)
534+
535+
try:
536+
loc = values.searchsorted(key, side='left')
537+
except TypeError:
538+
raise KeyError(key)
539+
540+
if loc == len(values) or util.get_value_at(values, loc) != key:
541+
raise KeyError(key)
542+
return loc
543+
544+
self._ensure_mapping_populated()
545+
if not self.unique:
546+
return self._get_loc_duplicates(key)
547+
548+
self._check_type(key)
549+
try:
550+
return self.mapping.get_item(key)
551+
except KeyError:
552+
raise KeyError(key)
553+
554+
cdef _check_type(self, object key):
555+
if not util.is_integer_object(key):
556+
raise KeyError(key)
521557

522558
cpdef convert_scalar(ndarray arr, object value):
523559
# we don't turn integers

pandas/_libs/tslibs/parsing.pyx

+2-4
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,8 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
251251
-------
252252
datetime, datetime/dateutil.parser._result, str
253253
"""
254-
if not isinstance(arg, (str, unicode)):
255-
# Note: cython recognizes `unicode` in both py2/py3, optimizes
256-
# this check into a C call.
257-
return arg
254+
if not isinstance(arg, str):
255+
raise TypeError
258256

259257
if getattr(freq, "_typ", None) == "dateoffset":
260258
freq = freq.rule_code

pandas/tests/indexes/period/test_period.py

+66
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,72 @@ def test_insert(self):
617617
result = period_range("2017Q1", periods=4, freq="Q").insert(1, na)
618618
tm.assert_index_equal(result, expected)
619619

620+
def test_contains_raise_error_if_period_index_is_in_multi_index(self):
621+
# issue 20684
622+
df = DataFrame(
623+
{
624+
"A": [Period("2019"), "x1"],
625+
"B": ["y1", Period("2018")],
626+
"C": ["z1", "z2"],
627+
"V1": [1, 2],
628+
"V2": [10, 20],
629+
}
630+
).set_index(["A", "B", "C"])
631+
msg = r"Period\('2019', 'A-DEC'\), 'foo', 'bar'"
632+
with pytest.raises(KeyError, match=msg):
633+
df.loc[(Period("2019"), "foo", "bar")]
634+
635+
msg = r"'foo', Period\('2018', 'A-DEC'\), 'bar'"
636+
with pytest.raises(KeyError, match=msg):
637+
df.loc[("foo", Period("2018"), "bar")]
638+
639+
pbc = DataFrame(
640+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
641+
).set_index(["p", "b", "c"])
642+
msg = r"Period\('2017', 'A-DEC'\), 'wibble', 'c1'"
643+
with pytest.raises(KeyError, match=msg):
644+
pbc.loc[(Period("2017"), "wibble", "c1")]
645+
646+
msg = r"Period\('2017', 'A-DEC'\), 'b1', 'wobble'"
647+
with pytest.raises(KeyError, match=msg):
648+
pbc.loc[(Period("2017"), "b1", "wobble"), "v"]
649+
650+
ybc = DataFrame({"y": ["2017"], "b": ["b1"], "c": ["c1"], "v": [1]}).set_index(
651+
["y", "b", "c"]
652+
)
653+
assert ybc.loc[("2017", "b1", "c1"), "v"] == 1
654+
655+
msg = r"'2017', 'b1', 'wobble'"
656+
with pytest.raises(KeyError, match=msg):
657+
ybc.loc[("2017", "b1", "wobble"), "v"]
658+
659+
pb = DataFrame({"p": [Period("2017")], "b": ["b1"], "v": [1]}).set_index(
660+
["p", "b"]
661+
)
662+
assert pb.loc[(Period("2017"), "b1"), "v"] == 1
663+
664+
msg = r"Period\('2017', 'A-DEC'\), 'wibble'"
665+
with pytest.raises(KeyError, match=msg):
666+
pb.loc[(Period("2017"), "wibble"), "v"]
667+
668+
bcp = pd.DataFrame(
669+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
670+
).set_index(["b", "c", "p"])
671+
assert bcp.loc[("b1", "c1", Period("2017")), "v"] == 1
672+
673+
msg = r"'b1', 'wibble', Period\('2017', 'A-DEC'\)"
674+
with pytest.raises(KeyError, match=msg):
675+
bcp.loc[("b1", "wibble", Period("2017")), "v"]
676+
677+
bpc = pd.DataFrame(
678+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
679+
).set_index(["b", "p", "c"])
680+
assert bpc.loc[("b1", Period("2017"), "c1"), "v"] == 1
681+
682+
msg = r"'b1', Period\('2017', 'A-DEC'\), 'wibble'"
683+
with pytest.raises(KeyError, match=msg):
684+
bpc.loc[("b1", Period("2017"), "wibble"), "v"]
685+
620686

621687
def test_maybe_convert_timedelta():
622688
pi = PeriodIndex(["2000", "2001"], freq="D")

pandas/tests/tslibs/test_parsing.py

+10
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,13 @@ def test_try_parse_dates():
209209

210210
expected = np.array([parse(d, dayfirst=True) for d in arr])
211211
tm.assert_numpy_array_equal(result, expected)
212+
213+
214+
def test_parse_time_string_check_instance_type_raise_exception():
215+
# issue 20684
216+
with pytest.raises(TypeError):
217+
parse_time_string((1, 2, 3))
218+
219+
result = parse_time_string("2019")
220+
expected = (datetime(2019, 1, 1), datetime(2019, 1, 1), "year")
221+
assert result == expected

0 commit comments

Comments
 (0)