Skip to content

Commit cc86bdd

Browse files
committed
Fix inconsistency in Partial String Index with 'second' resolution
See pandas-dev#14826. Now the following logic applies: - If timestamp resolution is strictly less precise than index resolution, timetamp is a slice as it can (in theory) correspond to more than one elements in the index. For `Series`, `[]` should return `Series`, for `DataFrame` — `DataFrame`. - If timestamp resolution is equal to index resolution, then timestamp is considered as an attempt to get a kind of "exact match". For `Series`, `[]` should return scalar, for `DataFrame` — try to find column with this key (if any), and most probably raise `KeyError`. - If timestamp resolution is strictly more precise than index resolution and does not resolve to exact match, `KeyError` have to be raised in both cases. Testsuite is updated as well.
1 parent ea51437 commit cc86bdd

File tree

3 files changed

+175
-7
lines changed

3 files changed

+175
-7
lines changed

pandas/tseries/index.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1294,12 +1294,12 @@ def _parsed_string_to_bounds(self, reso, parsed):
12941294
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
12951295
is_monotonic = self.is_monotonic
12961296
if (is_monotonic
1297-
and ((reso in ['day', 'hour', 'minute']
1298-
and self._resolution >= Resolution.get_reso(reso))
1299-
or (reso == 'second'
1300-
and self._resolution > Resolution.RESO_SEC))):
1297+
and reso in ['day', 'hour', 'minute', 'second']
1298+
and self._resolution >= Resolution.get_reso(reso)):
13011299
# These resolution/monotonicity validations came from GH3931,
13021300
# GH3452 and GH2369.
1301+
1302+
# See also GH14826
13031303
raise KeyError
13041304

13051305
if reso == 'microsecond':
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import nose
2+
import numpy as np
3+
4+
import pandas.util.testing as tm
5+
from pandas import (
6+
Index, Series, DataFrame, isnull, date_range, Timestamp, Period,
7+
DatetimeIndex, Int64Index, to_datetime, bdate_range, Float64Index,
8+
NaT, timedelta_range, Timedelta, _np_version_under1p8, concat)
9+
10+
from pandas.util.testing import (
11+
assert_frame_equal, assert_series_equal, assert_almost_equal,
12+
_skip_if_has_locale, slow)
13+
14+
class TestTimeSeriesPartialSlices(tm.TestCase):
15+
_multiprocess_can_split_ = True
16+
def assert_exact(self, df, ts, value):
17+
element = df['a'][ts]
18+
19+
# Series should return scalar
20+
self.assertIsInstance(element, np.int64)
21+
self.assertEqual(element, value)
22+
23+
# Frame should raise (exact match)
24+
self.assertRaises(KeyError, df.__getitem__, ts)
25+
26+
#TODO: test falling to column selection
27+
28+
def assert_slice(self, df, ts, slice):
29+
# Series should return slice
30+
expected = df['a'][slice]
31+
assert_series_equal(df['a'][ts], expected)
32+
33+
# Frame should return slice as well
34+
expected = df[slice]
35+
assert_frame_equal(df[ts], expected)
36+
37+
def assert_key_error(self, df, ts):
38+
self.assertRaises(KeyError, df['a'].__getitem__, ts)
39+
self.assertRaises(KeyError, df.__getitem__, ts)
40+
41+
def test_partial_slices_day(self):
42+
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31',
43+
'2012-01-01',
44+
'2012-01-02']),
45+
dtype=np.int64)
46+
47+
self.assertEqual(df.index.resolution, 'day')
48+
49+
# Timestamp with resolution 'day'
50+
self.assert_exact(df, '2011-12-31', 1)
51+
self.assert_exact(df, '2012-01-01', 2)
52+
self.assert_exact(df, '2012-01-02', 3)
53+
54+
# Timestamp with resolution less precise than 'day'
55+
for ts in ['2011', '2011-12']:
56+
self.assert_slice(df, ts, slice(None, 1))
57+
58+
# The same as previous but several elements in the slice
59+
for ts in ['2012', '2012-01']:
60+
self.assert_slice(df, ts, slice(1, None))
61+
62+
# Timestamp with resolution more precise than 'day'
63+
# Compatible with existing key
64+
for ts in ['2012-01-01 00', '2012-01-01 00:00',
65+
'2012-01-01 00:00:00']:
66+
self.assert_exact(df, ts, 2)
67+
68+
# Timestamp with resolution more precise than 'day'
69+
# Not compatible with existing key
70+
for ts in ['2012-01-01 01', '2012-01-01 00:01',
71+
'2012-01-01 00:00:01']:
72+
self.assert_key_error(df, ts)
73+
74+
75+
def test_partial_slice_hour(self):
76+
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23',
77+
'2012-01-01 00',
78+
'2012-01-01 01']),
79+
dtype=np.int64)
80+
81+
self.assertEqual(df.index.resolution, 'hour')
82+
83+
# Timestamp with resolution 'hour'
84+
self.assert_exact(df, '2011-12-31 23', 1)
85+
self.assert_exact(df, '2012-01-01 00', 2)
86+
self.assert_exact(df, '2012-01-01 01', 3)
87+
88+
# Timestamp with resolution less precise than 'hour'
89+
for ts in ['2011', '2011-12', '2011-12-31']:
90+
self.assert_slice(df, ts, slice(None, 1))
91+
92+
# The same as previous but several elements in the slice
93+
for ts in ['2012', '2012-01', '2012-01-01']:
94+
self.assert_slice(df, ts, slice(1, None))
95+
96+
# Timestamp with resolution more precise than 'hour'
97+
# Compatible with existing key
98+
for ts in ['2012-01-01 00:00',
99+
'2012-01-01 00:00:00']:
100+
self.assert_exact(df, ts, 2)
101+
102+
# Timestamp with resolution more precise than 'hour'
103+
# Not compatible with existing key
104+
for ts in ['2012-01-01 00:01',
105+
'2012-01-01 00:00:01']:
106+
self.assert_key_error(df, ts)
107+
108+
def test_partial_slice_minute(self):
109+
df = DataFrame({'a': [1, 2, 3]},
110+
DatetimeIndex(['2011-12-31 23:59',
111+
'2012-01-01 00:00',
112+
'2012-01-01 00:01']),
113+
dtype=np.int64)
114+
115+
self.assertEqual(df.index.resolution, 'minute')
116+
117+
# Timestamp with resolution 'minute'
118+
self.assert_exact(df, '2011-12-31 23:59', 1)
119+
self.assert_exact(df, '2012-01-01 00:00', 2)
120+
self.assert_exact(df, '2012-01-01 00:01', 3)
121+
122+
# Timestamp with resolution less precise than 'minute'
123+
for ts in ['2011', '2011-12', '2011-12-31',
124+
'2011-12-31 23']:
125+
self.assert_slice(df, ts, slice(None, 1))
126+
127+
# The same as previous but several elements in the slice
128+
for ts in ['2012', '2012-01', '2012-01-01',
129+
'2012-01-01 00']:
130+
self.assert_slice(df, ts, slice(1, None))
131+
132+
# Timestamp with resolution more precise than 'minute'
133+
# Compatible with existing key
134+
for ts in ['2012-01-01 00:00:00']:
135+
self.assert_exact(df, ts, 2)
136+
137+
# Timestamp with resolution more precise than 'minute'
138+
# Not compatible with existing key
139+
for ts in ['2012-01-01 00:00:01']:
140+
self.assert_key_error(df, ts)
141+
142+
def test_partial_slice_second(self):
143+
# See GH14826
144+
df = DataFrame({'a': [1, 2, 3]},
145+
DatetimeIndex(['2011-12-31 23:59:59',
146+
'2012-01-01 00:00:00',
147+
'2012-01-01 00:00:01']),
148+
dtype=np.int64)
149+
150+
self.assertEqual(df.index.resolution, 'second')
151+
152+
# Timestamp with resolution 'second'
153+
self.assert_exact(df, '2011-12-31 23:59:59', 1)
154+
self.assert_exact(df, '2012-01-01 00:00:00', 2)
155+
self.assert_exact(df, '2012-01-01 00:00:01', 3)
156+
157+
# Timestamp with resolution less precise than 'second'
158+
for ts in ['2011', '2011-12', '2011-12-31',
159+
'2011-12-31 23', '2011-12-31 23:59']:
160+
self.assert_slice(df, ts, slice(None, 1))
161+
162+
# The same as previous but several elements in the slice
163+
for ts in ['2012', '2012-01', '2012-01-01',
164+
'2012-01-01 00', '2012-01-01 00:00']:
165+
self.assert_slice(df, ts, slice(1, None))
166+
167+
# Not possible to create a string that represents timestamp
168+
# that is more exact then 'second'

pandas/tseries/tests/test_timeseries.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -266,18 +266,18 @@ def test_indexing(self):
266266
expected = ts['2013']
267267
assert_series_equal(expected, ts)
268268

269-
# GH 3925, indexing with a seconds resolution string / datetime object
269+
# GH14826, indexing with a seconds resolution string / datetime object
270270
df = DataFrame(randn(5, 5),
271271
columns=['open', 'high', 'low', 'close', 'volume'],
272272
index=date_range('2012-01-02 18:01:00',
273273
periods=5, tz='US/Central', freq='s'))
274274
expected = df.loc[[df.index[2]]]
275-
result = df['2012-01-02 18:01:02']
276-
assert_frame_equal(result, expected)
277275

278276
# this is a single date, so will raise
277+
self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', )
279278
self.assertRaises(KeyError, df.__getitem__, df.index[2], )
280279

280+
281281
def test_recreate_from_data(self):
282282
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N',
283283
'C']

0 commit comments

Comments
 (0)