Skip to content

Commit 3e4d003

Browse files
committed
Merge pull request #7043 from sinhrks/pslice
ENH/BUG: partial string indexing with PeriodIndex
2 parents c9df3d4 + cc08ddb commit 3e4d003

File tree

5 files changed

+157
-8
lines changed

5 files changed

+157
-8
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,7 @@ Bug Fixes
498498
- Bug in ``Dataframe.set_index``, ``reindex`` and ``pivot`` don't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`3950`, :issue:`5878`, :issue:`6631`)
499499
- Bug in ``MultiIndex.get_level_values`` doesn't preserve ``DatetimeIndex`` and ``PeriodIndex`` attributes (:issue:`7092`)
500500
- Bug in ``Groupby`` doesn't preserve ``tz`` (:issue:`3950`)
501+
- Bug in ``PeriodIndex`` partial string slicing (:issue:`6716`)
501502

502503
pandas 0.13.1
503504
-------------

doc/source/timeseries.rst

+36-3
Original file line numberDiff line numberDiff line change
@@ -299,8 +299,10 @@ intelligent functionality like selection, slicing, etc.
299299
ts[:5].index
300300
ts[::2].index
301301
302-
Partial String Indexing
303-
~~~~~~~~~~~~~~~~~~~~~~~
302+
.. _timeseries.partialindexing:
303+
304+
DatetimeIndex Partial String Indexing
305+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
304306

305307
You can pass in dates and strings that parse to dates as indexing parameters:
306308

@@ -1092,7 +1094,38 @@ objects:
10921094

10931095
.. ipython:: python
10941096
1095-
Series(randn(len(prng)), prng)
1097+
ps = Series(randn(len(prng)), prng)
1098+
ps
1099+
1100+
PeriodIndex Partial String Indexing
1101+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1102+
1103+
You can pass in dates and strings to `Series` and `DataFrame` with `PeriodIndex`, as the same manner as `DatetimeIndex`. For details, refer to :ref:`DatetimeIndex Partial String Indexing <timeseries.partialindexing>`.
1104+
1105+
.. ipython:: python
1106+
1107+
ps['2011-01']
1108+
1109+
ps[datetime(2011, 12, 25):]
1110+
1111+
ps['10/31/2011':'12/31/2011']
1112+
1113+
Passing string represents lower frequency than `PeriodIndex` returns partial sliced data.
1114+
1115+
.. ipython:: python
1116+
1117+
ps['2011']
1118+
1119+
dfp = DataFrame(randn(600,1), columns=['A'],
1120+
index=period_range('2013-01-01 9:00', periods=600, freq='T'))
1121+
dfp
1122+
dfp['2013-01-01 10H']
1123+
1124+
As the same as `DatetimeIndex`, the endpoints will be included in the result. Below example slices data starting from 10:00 to 11:59.
1125+
1126+
.. ipython:: python
1127+
1128+
dfp['2013-01-01 10H':'2013-01-01 11H']
10961129
10971130
Frequency Conversion and Resampling with PeriodIndex
10981131
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/v0.14.0.txt

+9
Original file line numberDiff line numberDiff line change
@@ -591,6 +591,15 @@ Enhancements
591591

592592
- str.wrap implemented (:issue:`6999`)
593593

594+
- `PeriodIndex` fully supports partial string indexing like `DatetimeIndex` (:issue:`7043`)
595+
596+
.. ipython:: python
597+
598+
prng = period_range('2013-01-01 09:00', periods=100, freq='H')
599+
ps = Series(np.random.randn(len(prng)), index=prng)
600+
ps
601+
ps['2013-01-02']
602+
594603
.. _whatsnew_0140.performance:
595604

596605
Performance

pandas/tseries/period.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -908,13 +908,13 @@ def get_value(self, series, key):
908908
pos = np.searchsorted(self.values, [ord1, ord2])
909909
key = slice(pos[0], pos[1] + 1)
910910
return series[key]
911-
else:
911+
elif grp == freqn:
912912
key = Period(asdt, freq=self.freq).ordinal
913913
return _maybe_box(self, self._engine.get_value(s, key), series, key)
914+
else:
915+
raise KeyError(key)
914916
except TypeError:
915917
pass
916-
except KeyError:
917-
pass
918918

919919
key = Period(key, self.freq).ordinal
920920
return _maybe_box(self, self._engine.get_value(s, key), series, key)
@@ -978,8 +978,10 @@ def _get_string_slice(self, key):
978978
raise ValueError('Partial indexing only valid for '
979979
'ordered time series')
980980

981-
asdt, parsed, reso = parse_time_string(key, self.freq)
982-
key = asdt
981+
key, parsed, reso = parse_time_string(key, self.freq)
982+
983+
grp = _freq_mod._infer_period_group(reso)
984+
freqn = _freq_mod._period_group(self.freq)
983985

984986
if reso == 'year':
985987
t1 = Period(year=parsed.year, freq='A')
@@ -988,6 +990,19 @@ def _get_string_slice(self, key):
988990
elif reso == 'quarter':
989991
q = (parsed.month - 1) // 3 + 1
990992
t1 = Period(year=parsed.year, quarter=q, freq='Q-DEC')
993+
elif reso == 'day' and grp < freqn:
994+
t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
995+
freq='D')
996+
elif reso == 'hour' and grp < freqn:
997+
t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
998+
hour=parsed.hour, freq='H')
999+
elif reso == 'minute' and grp < freqn:
1000+
t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
1001+
hour=parsed.hour, minute=parsed.minute, freq='T')
1002+
elif reso == 'second' and grp < freqn:
1003+
t1 = Period(year=parsed.year, month=parsed.month, day=parsed.day,
1004+
hour=parsed.hour, minute=parsed.minute, second=parsed.second,
1005+
freq='S')
9911006
else:
9921007
raise KeyError(key)
9931008

pandas/tseries/tests/test_period.py

+91
Original file line numberDiff line numberDiff line change
@@ -1851,6 +1851,97 @@ def test_pindex_slice_index(self):
18511851
exp = s[12:24]
18521852
assert_series_equal(res, exp)
18531853

1854+
def test_getitem_day(self):
1855+
# GH 6716
1856+
# Confirm DatetimeIndex and PeriodIndex works identically
1857+
didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
1858+
pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)
1859+
1860+
for idx in [didx, pidx]:
1861+
# getitem against index should raise ValueError
1862+
values = ['2014', '2013/02', '2013/01/02',
1863+
'2013/02/01 9H', '2013/02/01 09:00']
1864+
for v in values:
1865+
with tm.assertRaises(ValueError):
1866+
idx[v]
1867+
1868+
s = Series(np.random.rand(len(idx)), index=idx)
1869+
assert_series_equal(s['2013/01'], s[0:31])
1870+
assert_series_equal(s['2013/02'], s[31:59])
1871+
assert_series_equal(s['2014'], s[365:])
1872+
1873+
invalid = ['2013/02/01 9H', '2013/02/01 09:00']
1874+
for v in invalid:
1875+
with tm.assertRaises(KeyError):
1876+
s[v]
1877+
1878+
def test_range_slice_day(self):
1879+
# GH 6716
1880+
didx = DatetimeIndex(start='2013/01/01', freq='D', periods=400)
1881+
pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400)
1882+
1883+
for idx in [didx, pidx]:
1884+
# slices against index should raise IndexError
1885+
values = ['2014', '2013/02', '2013/01/02',
1886+
'2013/02/01 9H', '2013/02/01 09:00']
1887+
for v in values:
1888+
with tm.assertRaises(IndexError):
1889+
idx[v:]
1890+
1891+
s = Series(np.random.rand(len(idx)), index=idx)
1892+
1893+
assert_series_equal(s['2013/01/02':], s[1:])
1894+
assert_series_equal(s['2013/01/02':'2013/01/05'], s[1:5])
1895+
assert_series_equal(s['2013/02':], s[31:])
1896+
assert_series_equal(s['2014':], s[365:])
1897+
1898+
invalid = ['2013/02/01 9H', '2013/02/01 09:00']
1899+
for v in invalid:
1900+
with tm.assertRaises(IndexError):
1901+
idx[v:]
1902+
1903+
def test_getitem_seconds(self):
1904+
# GH 6716
1905+
didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
1906+
pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
1907+
1908+
for idx in [didx, pidx]:
1909+
# getitem against index should raise ValueError
1910+
values = ['2014', '2013/02', '2013/01/02',
1911+
'2013/02/01 9H', '2013/02/01 09:00']
1912+
for v in values:
1913+
with tm.assertRaises(ValueError):
1914+
idx[v]
1915+
1916+
s = Series(np.random.rand(len(idx)), index=idx)
1917+
1918+
assert_series_equal(s['2013/01/01 10:00'], s[3600:3660])
1919+
assert_series_equal(s['2013/01/01 9H'], s[:3600])
1920+
for d in ['2013/01/01', '2013/01', '2013']:
1921+
assert_series_equal(s[d], s)
1922+
1923+
def test_range_slice_seconds(self):
1924+
# GH 6716
1925+
didx = DatetimeIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
1926+
pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000)
1927+
1928+
for idx in [didx, pidx]:
1929+
# slices against index should raise IndexError
1930+
values = ['2014', '2013/02', '2013/01/02',
1931+
'2013/02/01 9H', '2013/02/01 09:00']
1932+
for v in values:
1933+
with tm.assertRaises(IndexError):
1934+
idx[v:]
1935+
1936+
s = Series(np.random.rand(len(idx)), index=idx)
1937+
1938+
assert_series_equal(s['2013/01/01 09:05':'2013/01/01 09:10'], s[300:660])
1939+
assert_series_equal(s['2013/01/01 10:00':'2013/01/01 10:05'], s[3600:3960])
1940+
assert_series_equal(s['2013/01/01 10H':], s[3600:])
1941+
assert_series_equal(s[:'2013/01/01 09:30'], s[:1860])
1942+
for d in ['2013/01/01', '2013/01', '2013']:
1943+
assert_series_equal(s[d:], s)
1944+
18541945
def test_pindex_qaccess(self):
18551946
pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q')
18561947
s = Series(np.random.rand(len(pi)), index=pi).cumsum()

0 commit comments

Comments
 (0)