Skip to content

Commit fc5554b

Browse files
committed
BUG: loc misbehaves when Period is at start of 3-level MultiIndex
(pandas-dev#20684) If `MultiIndex` is in PeriodIndex, `.loc` would raise exception with a miss match key `PeriodEngine.get_loc` only accept Period.ordinal
1 parent 96bf661 commit fc5554b

File tree

6 files changed

+102
-26
lines changed

6 files changed

+102
-26
lines changed

doc/source/whatsnew/v1.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,8 @@ Missing
173173

174174
MultiIndex
175175
^^^^^^^^^^
176-
177176
-
177+
- Using `.loc` with a `PeriodIndex` as a level in a `MultiIndex`, `.loc` could not raise exception when key is not matched (:issue:`20684`)
178178
-
179179

180180
I/O

pandas/_libs/index.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ from pandas._libs.hashtable cimport HashTable
2424
from pandas._libs import algos, hashtable as _hash
2525
from pandas._libs.tslibs import Timestamp, Timedelta, period as periodlib
2626
from pandas._libs.missing import checknull
27+
from pandas._libs.tslibs.util cimport is_period_object
2728

2829
cdef int64_t NPY_NAT = util.get_nat()
2930

@@ -479,7 +480,7 @@ cdef class TimedeltaEngine(DatetimeEngine):
479480
cdef class PeriodEngine(Int64Engine):
480481

481482
cdef _get_index_values(self):
482-
return super(PeriodEngine, self).vgetter()
483+
return super(PeriodEngine, self).vgetter().view('i8')
483484

484485
cpdef _call_map_locations(self, values):
485486
super(PeriodEngine, self)._call_map_locations(values.view('i8'))

pandas/_libs/tslibs/parsing.pyx

+2-4
Original file line numberDiff line numberDiff line change
@@ -251,10 +251,8 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
251251
-------
252252
datetime, datetime/dateutil.parser._result, str
253253
"""
254-
if not isinstance(arg, (str, unicode)):
255-
# Note: cython recognizes `unicode` in both py2/py3, optimizes
256-
# this check into a C call.
257-
return arg
254+
if not isinstance(arg, str):
255+
raise TypeError
258256

259257
if getattr(freq, "_typ", None) == "dateoffset":
260258
freq = freq.rule_code

pandas/core/indexes/period.py

+21-20
Original file line numberDiff line numberDiff line change
@@ -702,36 +702,37 @@ def get_loc(self, key, method=None, tolerance=None):
702702
-------
703703
loc : int
704704
"""
705+
if is_integer(key):
706+
ordinal = key
707+
else:
708+
key = self._cast_period_object(key)
709+
ordinal = iNaT if key is NaT else key.ordinal
710+
711+
if tolerance is not None:
712+
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
713+
705714
try:
706-
return self._engine.get_loc(key)
715+
return self._int64index.get_loc(ordinal, method, tolerance)
707716
except KeyError:
708-
if is_integer(key):
709-
raise
717+
raise KeyError(key)
710718

719+
def _cast_period_object(self, key):
720+
if isinstance(key, str):
711721
try:
712722
asdt, parsed, reso = parse_time_string(key, self.freq)
713723
key = asdt
714-
except TypeError:
715-
pass
716724
except DateParseError:
717725
# A string with invalid format
718726
raise KeyError("Cannot interpret '{}' as period".format(key))
719727

720-
try:
721-
key = Period(key, freq=self.freq)
722-
except ValueError:
723-
# we cannot construct the Period
724-
# as we have an invalid type
725-
raise KeyError(key)
726-
727-
try:
728-
ordinal = iNaT if key is NaT else key.ordinal
729-
if tolerance is not None:
730-
tolerance = self._convert_tolerance(tolerance, np.asarray(key))
731-
return self._int64index.get_loc(ordinal, method, tolerance)
732-
733-
except KeyError:
734-
raise KeyError(key)
728+
try:
729+
return Period(key, freq=self.freq)
730+
except TypeError:
731+
raise TypeError("'{key}' is an invalid key".format(key=key))
732+
except ValueError:
733+
# we cannot construct the Period
734+
# as we have an invalid type
735+
raise KeyError(key)
735736

736737
def _maybe_cast_slice_bound(self, label, side, kind):
737738
"""

pandas/tests/indexes/period/test_period.py

+66
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,72 @@ def test_insert(self):
617617
result = period_range("2017Q1", periods=4, freq="Q").insert(1, na)
618618
tm.assert_index_equal(result, expected)
619619

620+
def test_contains_raise_error_if_period_index_is_in_multi_index(self):
621+
# issue 20684
622+
df = DataFrame(
623+
{
624+
"A": [Period("2019"), "x1"],
625+
"B": ["y1", Period("2018")],
626+
"C": ["z1", "z2"],
627+
"V1": [1, 2],
628+
"V2": [10, 20],
629+
}
630+
).set_index(["A", "B", "C"])
631+
msg = r"Period\('2019', 'A-DEC'\), 'foo', 'bar'"
632+
with pytest.raises(KeyError, match=msg):
633+
df.loc[(Period("2019"), "foo", "bar")]
634+
635+
msg = r"'foo', Period\('2018', 'A-DEC'\), 'bar'"
636+
with pytest.raises(KeyError, match=msg):
637+
df.loc[("foo", Period("2018"), "bar")]
638+
639+
pbc = DataFrame(
640+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
641+
).set_index(["p", "b", "c"])
642+
msg = r"Period\('2017', 'A-DEC'\), 'wibble', 'c1'"
643+
with pytest.raises(KeyError, match=msg):
644+
pbc.loc[(Period("2017"), "wibble", "c1")]
645+
646+
msg = r"Period\('2017', 'A-DEC'\), 'b1', 'wobble'"
647+
with pytest.raises(KeyError, match=msg):
648+
pbc.loc[(Period("2017"), "b1", "wobble"), "v"]
649+
650+
ybc = DataFrame({"y": ["2017"], "b": ["b1"], "c": ["c1"], "v": [1]}).set_index(
651+
["y", "b", "c"]
652+
)
653+
assert ybc.loc[("2017", "b1", "c1"), "v"] == 1
654+
655+
msg = r"'2017', 'b1', 'wobble'"
656+
with pytest.raises(KeyError, match=msg):
657+
ybc.loc[("2017", "b1", "wobble"), "v"]
658+
659+
pb = DataFrame({"p": [Period("2017")], "b": ["b1"], "v": [1]}).set_index(
660+
["p", "b"]
661+
)
662+
assert pb.loc[(Period("2017"), "b1"), "v"] == 1
663+
664+
msg = r"Period\('2017', 'A-DEC'\), 'wibble'"
665+
with pytest.raises(KeyError, match=msg):
666+
pb.loc[(Period("2017"), "wibble"), "v"]
667+
668+
bcp = pd.DataFrame(
669+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
670+
).set_index(["b", "c", "p"])
671+
assert bcp.loc[("b1", "c1", Period("2017")), "v"] == 1
672+
673+
msg = r"'b1', 'wibble', Period\('2017', 'A-DEC'\)"
674+
with pytest.raises(KeyError, match=msg):
675+
bcp.loc[("b1", "wibble", Period("2017")), "v"]
676+
677+
bpc = pd.DataFrame(
678+
{"p": [Period("2017")], "b": ["b1"], "c": ["c1"], "v": [1]}
679+
).set_index(["b", "p", "c"])
680+
assert bpc.loc[("b1", Period("2017"), "c1"), "v"] == 1
681+
682+
msg = r"'b1', Period\('2017', 'A-DEC'\), 'wibble'"
683+
with pytest.raises(KeyError, match=msg):
684+
bpc.loc[("b1", Period("2017"), "wibble"), "v"]
685+
620686

621687
def test_maybe_convert_timedelta():
622688
pi = PeriodIndex(["2000", "2001"], freq="D")

pandas/tests/tslibs/test_parsing.py

+10
Original file line numberDiff line numberDiff line change
@@ -209,3 +209,13 @@ def test_try_parse_dates():
209209

210210
expected = np.array([parse(d, dayfirst=True) for d in arr])
211211
tm.assert_numpy_array_equal(result, expected)
212+
213+
214+
def test_parse_time_string_check_instance_type_raise_exception():
215+
# issue 20684
216+
with pytest.raises(TypeError):
217+
parse_time_string((1, 2, 3))
218+
219+
result = parse_time_string("2019")
220+
expected = (datetime(2019, 1, 1), datetime(2019, 1, 1), "year")
221+
assert result == expected

0 commit comments

Comments
 (0)