From 9f4735371f5c51edcacdb33656aa3117c366db9b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 20 Jan 2020 18:35:29 -0800 Subject: [PATCH 1/4] BUG: Inconsistency between PeriodIndex.get_value vs get_loc --- pandas/core/indexes/period.py | 31 ++++++++++++++++---- pandas/tests/indexes/period/test_indexing.py | 9 +++++- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b3386f6104032..b09dfea7ec3a2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -567,12 +567,33 @@ def get_loc(self, key, method=None, tolerance=None): """ if isinstance(key, str): + try: - asdt, reso = parse_time_string(key, self.freq) - key = asdt - except DateParseError: - # A string with invalid format - raise KeyError(f"Cannot interpret '{key}' as period") + loc = self._get_string_slice(key) + return loc + except (TypeError, ValueError): + + try: + asdt, reso = parse_time_string(key, self.freq) + except DateParseError: + # A string with invalid format + raise KeyError(f"Cannot interpret '{key}' as period") + + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) + + # _get_string_slice will handle cases where grp < freqn + assert grp >= freqn + + if grp == freqn: + key = Period(asdt, freq=self.freq) + loc = self.get_loc(key, method=method, tolerance=tolerance) + # TODO: or better just to let fall through? + return loc + elif method is None: + raise KeyError(key) + else: + key = asdt elif is_integer(key): # Period constructor will cast to string, which we dont want diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 1e3160980e8bb..f93e71ac151e4 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -518,12 +518,19 @@ def test_contains(self): ps0 = [p0, p1, p2] idx0 = pd.PeriodIndex(ps0) + ser = pd.Series(range(6, 9), index=idx0) for p in ps0: assert p in idx0 assert str(p) in idx0 - assert "2017-09-01 00:00:01" in idx0 + # Higher-resolution period-like are _not_ considered as contained + key = "2017-09-01 00:00:01" + assert key not in idx0 + with pytest.raises(KeyError, match=key): + idx0.get_loc(key) + with pytest.raises(KeyError, match=key): + idx0.get_value(ser, key) assert "2017-09" in idx0 From 10e39391e8e8294aaf91e8f15e79e74708f3ce7f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 20 Jan 2020 18:37:38 -0800 Subject: [PATCH 2/4] remove comment --- pandas/core/indexes/period.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index b09dfea7ec3a2..1eea2579abfdc 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -588,7 +588,6 @@ def get_loc(self, key, method=None, tolerance=None): if grp == freqn: key = Period(asdt, freq=self.freq) loc = self.get_loc(key, method=method, tolerance=tolerance) - # TODO: or better just to let fall through? return loc elif method is None: raise KeyError(key) From cbf235ceefa5cb06ff9db1a0882cf18d8473aed4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 20 Jan 2020 18:46:17 -0800 Subject: [PATCH 3/4] GH refs --- doc/source/whatsnew/v1.1.0.rst | 2 +- pandas/tests/indexes/period/test_indexing.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index a04ba157ce0ae..dbe6dbb147b7f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -115,7 +115,7 @@ Interval Indexing ^^^^^^^^ - Bug in slicing on a :class:`DatetimeIndex` with a partial-timestamp dropping high-resolution indices near the end of a year, quarter, or month (:issue:`31064`) -- +- Bug in :meth:`PeriodIndex.get_loc` treating higher-resolution strings differently from :meth:`PeriodIndex.get_value` (:issue:`31172`) - Missing diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index f93e71ac151e4..4c1438915ab33 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -524,6 +524,7 @@ def test_contains(self): assert p in idx0 assert str(p) in idx0 + # GH#31172 # Higher-resolution period-like are _not_ considered as contained key = "2017-09-01 00:00:01" assert key not in idx0 From 512b274c8022ef75e7baf6824d9984f54e3f57e2 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 24 Jan 2020 17:12:27 -0800 Subject: [PATCH 4/4] update per comments --- pandas/core/indexes/period.py | 43 ++++++++++++++++++----------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 5e2b4b9db7a76..fe6c1ba808f9a 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -486,7 +486,7 @@ def get_value(self, series, key): try: loc = self._get_string_slice(key) return series[loc] - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): pass asdt, reso = parse_time_string(key, self.freq) @@ -572,27 +572,28 @@ def get_loc(self, key, method=None, tolerance=None): loc = self._get_string_slice(key) return loc except (TypeError, ValueError): + pass + + try: + asdt, reso = parse_time_string(key, self.freq) + except DateParseError: + # A string with invalid format + raise KeyError(f"Cannot interpret '{key}' as period") - try: - asdt, reso = parse_time_string(key, self.freq) - except DateParseError: - # A string with invalid format - raise KeyError(f"Cannot interpret '{key}' as period") - - grp = resolution.Resolution.get_freq_group(reso) - freqn = resolution.get_freq_group(self.freq) - - # _get_string_slice will handle cases where grp < freqn - assert grp >= freqn - - if grp == freqn: - key = Period(asdt, freq=self.freq) - loc = self.get_loc(key, method=method, tolerance=tolerance) - return loc - elif method is None: - raise KeyError(key) - else: - key = asdt + grp = resolution.Resolution.get_freq_group(reso) + freqn = resolution.get_freq_group(self.freq) + + # _get_string_slice will handle cases where grp < freqn + assert grp >= freqn + + if grp == freqn: + key = Period(asdt, freq=self.freq) + loc = self.get_loc(key, method=method, tolerance=tolerance) + return loc + elif method is None: + raise KeyError(key) + else: + key = asdt elif is_integer(key): # Period constructor will cast to string, which we dont want