From cc08ddbfb934a28444ef5927979e17da2669e53f Mon Sep 17 00:00:00 2001 From: sinhrks Date: Mon, 5 May 2014 18:58:41 +0900 Subject: [PATCH] ENH/BUG: partial string indexing with PeriodIndex --- doc/source/release.rst | 1 + doc/source/timeseries.rst | 39 ++++++++++++- doc/source/v0.14.0.txt | 9 +++ pandas/tseries/period.py | 25 ++++++-- pandas/tseries/tests/test_period.py | 91 +++++++++++++++++++++++++++++ 5 files changed, 157 insertions(+), 8 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index 4886850b928b1..b12f4eca010d9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -498,6 +498,7 @@ Bug Fixes - Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`) - Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`) - Bug in ``Groupby`` doesn't preserve ``tz`` (:issue:`3950`) +- Bug in ``PeriodIndex`` partial string slicing (:issue:`6716`) pandas 0.13.1 ------------- diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 1cae66fada587..65796d95fed0a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -299,8 +299,10 @@ intelligent functionality like selection, slicing, etc. ts[:5].index ts[::2].index -Partial String Indexing -~~~~~~~~~~~~~~~~~~~~~~~ +.. _timeseries.partialindexing: + +DatetimeIndex Partial String Indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You can pass in dates and strings that parse to dates as indexing parameters: @@ -1092,7 +1094,38 @@ objects: .. ipython:: python - Series(randn(len(prng)), prng) + ps = Series(randn(len(prng)), prng) + ps + +PeriodIndex Partial String Indexing +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +You can pass in dates and strings to `Series` and `DataFrame` with `PeriodIndex`, as the same manner as `DatetimeIndex`. For details, refer to :ref:`DatetimeIndex Partial String Indexing `. + +.. ipython:: python + + ps['2011-01'] + + ps[datetime(2011, 12, 25):] + + ps['10/31/2011':'12/31/2011'] + +Passing string represents lower frequency than `PeriodIndex` returns partial sliced data. + +.. ipython:: python + + ps['2011'] + + dfp = DataFrame(randn(600,1), columns=['A'], + index=period_range('2013-01-01 9:00', periods=600, freq='T')) + dfp + dfp['2013-01-01 10H'] + +As the same as `DatetimeIndex`, the endpoints will be included in the result. Below example slices data starting from 10:00 to 11:59. + +.. ipython:: python + + dfp['2013-01-01 10H':'2013-01-01 11H'] Frequency Conversion and Resampling with PeriodIndex ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.14.0.txt b/doc/source/v0.14.0.txt index 49551c5bd3550..910c166e22ec5 100644 --- a/doc/source/v0.14.0.txt +++ b/doc/source/v0.14.0.txt @@ -591,6 +591,15 @@ Enhancements - str.wrap implemented (:issue:`6999`) +- `PeriodIndex` fully supports partial string indexing like `DatetimeIndex` (:issue:`7043`) + + .. ipython:: python + + prng = period_range('2013-01-01 09:00', periods=100, freq='H') + ps = Series(np.random.randn(len(prng)), index=prng) + ps + ps['2013-01-02'] + .. _whatsnew_0140.performance: Performance diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 113be28f86976..b70a7dafa28bd 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -908,13 +908,13 @@ def get_value(self, series, key): pos = np.searchsorted(self.values, [ord1, ord2]) key = slice(pos[0], pos[1] + 1) return series[key] - else: + elif grp == freqn: key = Period(asdt, freq=self.freq).ordinal return _maybe_box(self, self._engine.get_value(s, key), series, key) + else: + raise KeyError(key) except TypeError: pass - except KeyError: - pass key = Period(key, self.freq).ordinal return _maybe_box(self, self._engine.get_value(s, key), series, key) @@ -978,8 +978,10 @@ def _get_string_slice(self, key): raise ValueError('Partial indexing only valid for ' 'ordered time series') - asdt, parsed, reso = parse_time_string(key, self.freq) - key = asdt + key, parsed, reso = parse_time_string(key, self.freq) + + grp = _freq_mod._infer_period_group(reso) + freqn = _freq_mod._period_group(self.freq) if reso == 'year': t1 = Period(year=parsed.year, freq='A') @@ -988,6 +990,19 @@ def _get_string_slice(self, key): elif reso == 'quarter': q = (parsed.month - 1) // 3 + 1 t1 = Period(year=parsed.year, quarter=q, freq='Q-DEC') + elif reso == 'day' and grp < freqn: + t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, + freq='D') + elif reso == 'hour' and grp < freqn: + t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, + hour=parsed.hour, freq='H') + elif reso == 'minute' and grp < freqn: + t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, + hour=parsed.hour, minute=parsed.minute, freq='T') + elif reso == 'second' and grp < freqn: + t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day, + hour=parsed.hour, minute=parsed.minute, second=parsed.second, + freq='S') else: raise KeyError(key) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 4117ca660db35..419ab48a01a07 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1851,6 +1851,97 @@ def test_pindex_slice_index(self): exp = s[12:24] assert_series_equal(res, exp) + def test_getitem_day(self): + # GH 6716 + # Confirm DatetimeIndex and PeriodIndex works identically + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', + '2013/02/01 9H', '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(ValueError): + idx[v] + + s = Series(np.random.rand(len(idx)), index=idx) + assert_series_equal(s['2013/01'], s[0:31]) + assert_series_equal(s['2013/02'], s[31:59]) + assert_series_equal(s['2014'], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(KeyError): + s[v] + + def test_range_slice_day(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400) + pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', + '2013/02/01 9H', '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(IndexError): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + assert_series_equal(s['2013/01/02':], s[1:]) + assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5]) + assert_series_equal(s['2013/02':], s[31:]) + assert_series_equal(s['2014':], s[365:]) + + invalid = ['2013/02/01 9H', '2013/02/01 09:00'] + for v in invalid: + with tm.assertRaises(IndexError): + idx[v:] + + def test_getitem_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # getitem against index should raise ValueError + values = ['2014', '2013/02', '2013/01/02', + '2013/02/01 9H', '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(ValueError): + idx[v] + + s = Series(np.random.rand(len(idx)), index=idx) + + assert_series_equal(s['2013/01/01 10:00'], s[3600:3660]) + assert_series_equal(s['2013/01/01 9H'], s[:3600]) + for d in ['2013/01/01', '2013/01', '2013']: + assert_series_equal(s[d], s) + + def test_range_slice_seconds(self): + # GH 6716 + didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + + for idx in [didx, pidx]: + # slices against index should raise IndexError + values = ['2014', '2013/02', '2013/01/02', + '2013/02/01 9H', '2013/02/01 09:00'] + for v in values: + with tm.assertRaises(IndexError): + idx[v:] + + s = Series(np.random.rand(len(idx)), index=idx) + + assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], s[300:660]) + assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], s[3600:3960]) + assert_series_equal(s['2013/01/01 10H':], s[3600:]) + assert_series_equal(s[:'2013/01/01 09:30'], s[:1860]) + for d in ['2013/01/01', '2013/01', '2013']: + assert_series_equal(s[d:], s) + def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') s = Series(np.random.rand(len(pi)), index=pi).cumsum()