Skip to content

Fix inconsistency in Partial String Index with 'second' resolution #14856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,14 +1293,12 @@ def _parsed_string_to_bounds(self, reso, parsed):

def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
is_monotonic = self.is_monotonic
if ((reso in ['day', 'hour', 'minute'] and
not (self._resolution < Resolution.get_reso(reso) or
not is_monotonic)) or
(reso == 'second' and
not (self._resolution <= Resolution.RESO_SEC or
not is_monotonic))):
if (is_monotonic and reso in ['day', 'hour', 'minute', 'second'] and
self._resolution >= Resolution.get_reso(reso)):
# These resolution/monotonicity validations came from GH3931,
# GH3452 and GH2369.

# See also GH14826
raise KeyError

if reso == 'microsecond':
Expand Down
228 changes: 225 additions & 3 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,15 @@ def test_indexing(self):
expected = ts['2013']
assert_series_equal(expected, ts)

# GH 3925, indexing with a seconds resolution string / datetime object
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(randn(5, 5),
columns=['open', 'high', 'low', 'close', 'volume'],
index=date_range('2012-01-02 18:01:00',
periods=5, tz='US/Central', freq='s'))
expected = df.loc[[df.index[2]]]
result = df['2012-01-02 18:01:02']
assert_frame_equal(result, expected)

# this is a single date, so will raise
self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', )
self.assertRaises(KeyError, df.__getitem__, df.index[2], )

def test_recreate_from_data(self):
Expand Down Expand Up @@ -4897,6 +4896,64 @@ def test_partial_slice_daily(self):

self.assertRaises(Exception, s.__getitem__, '2004-12-31 00')

# GH14856
# DatetimeIndex without explicit freq
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31',
'2012-01-01',
'2012-01-02']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'day')

# Timestamp with resolution 'day'
# Should be exact match for series and raise KeyError for Frame
for ts, expected in (('2011-12-31', 1), ('2012-01-01', 2),
('2012-01-02', 3)):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, expected)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution less precise than 'day'
for ts in ('2011', '2011-12'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][:1]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[:1]
assert_frame_equal(result, expected)

# The same as previous but several elements in the slice
for ts in ('2012', '2012-01'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][1:]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[1:]
assert_frame_equal(result, expected)

# Timestamp with resolution more precise than 'day'
# Compatible with existing key
for ts in ('2012-01-01 00', '2012-01-01 00:00',
'2012-01-01 00:00:00'):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, 2)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution more precise than 'day'
# Not compatible with existing key
for ts in ('2012-01-01 01', '2012-01-01 00:01',
'2012-01-01 00:00:01'):
self.assertRaises(KeyError, df['a'].__getitem__, ts)
self.assertRaises(KeyError, df.__getitem__, ts)

def test_partial_slice_hourly(self):
rng = DatetimeIndex(freq='T', start=datetime(2005, 1, 1, 20, 0, 0),
periods=500)
Expand All @@ -4911,6 +4968,63 @@ def test_partial_slice_hourly(self):
self.assertEqual(s['2005-1-1 20:00'], s.ix[0])
self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:15')

# GH14856
# DatetimeIndex without explicit freq
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23',
'2012-01-01 00',
'2012-01-01 01']),
dtype=np.int64)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are repeating lots of tests with only a slight variation

can u so it instead with a loop

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. Actually, it was the reason why I wrote those helper functions initially.

Okay, I united tests for all resolutions into one loop. IMO it become less readable, but reflects the logic properly.

self.assertEqual(df.index.resolution, 'hour')

# Timestamp with resolution 'hour'
# Should be exact match for series and raise KeyError for Frame
for ts, expected in (('2011-12-31 23', 1),
('2012-01-01 00', 2),
('2012-01-01 01', 3)):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, expected)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution less precise than 'hour'
for ts in ('2011', '2011-12', '2011-12-31'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][:1]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[:1]
assert_frame_equal(result, expected)

# The same as previous but several elements in the slice
for ts in ('2012', '2012-01', '2012-01-01'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][1:]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[1:]
assert_frame_equal(result, expected)

# Timestamp with resolution more precise than 'hour'
# Compatible with existing key
for ts in ('2012-01-01 00:00', '2012-01-01 00:00:00'):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, 2)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution more precise than 'day'
# Not compatible with existing key
for ts in ('2012-01-01 00:01', '2012-01-01 00:00:01'):
self.assertRaises(KeyError, df['a'].__getitem__, ts)
self.assertRaises(KeyError, df.__getitem__, ts)

def test_partial_slice_minutely(self):
rng = DatetimeIndex(freq='S', start=datetime(2005, 1, 1, 23, 59, 0),
periods=500)
Expand All @@ -4925,6 +5039,64 @@ def test_partial_slice_minutely(self):
self.assertEqual(s[Timestamp('2005-1-1 23:59:00')], s.ix[0])
self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00')

# GH14856
# DatetimeIndex without explicit freq
df = DataFrame({'a': [1, 2, 3]},
DatetimeIndex(['2011-12-31 23:59',
'2012-01-01 00:00',
'2012-01-01 00:01']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'minute')

# Timestamp with resolution 'minute'
# Should be exact match for series and raise KeyError for Frame
for ts, expected in (('2011-12-31 23:59', 1),
('2012-01-01 00:00', 2),
('2012-01-01 00:01', 3)):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, expected)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution less precise than 'minute'
for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][:1]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[:1]
assert_frame_equal(result, expected)

# The same as previous but several elements in the slice
for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][1:]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[1:]
assert_frame_equal(result, expected)

# Timestamp with resolution more precise than 'minute'
# Compatible with existing key
ts = '2012-01-01 00:00:00'
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, 2)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution more precise than 'day'
# Not compatible with existing key
ts = '2012-01-01 00:00:01'
self.assertRaises(KeyError, df['a'].__getitem__, ts)
self.assertRaises(KeyError, df.__getitem__, ts)

def test_partial_slice_second_precision(self):
rng = DatetimeIndex(start=datetime(2005, 1, 1, 0, 0, 59,
microsecond=999990),
Expand All @@ -4941,6 +5113,56 @@ def test_partial_slice_second_precision(self):
self.assertRaisesRegexp(KeyError, '2005-1-1 00:00:00',
lambda: s['2005-1-1 00:00:00'])

# GH14856
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give a 1-2 lines about what are asserting here

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. 67e6bab

# DatetimeIndex without explicit freq
# Without microseconds
df = DataFrame({'a': [1, 2, 3]},
DatetimeIndex(['2011-12-31 23:59:59',
'2012-01-01 00:00:00',
'2012-01-01 00:00:01']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'second')

# Timestamp with resolution 'second'
# Should be exact match for series and raise KeyError for Frame
for ts, expected in (('2011-12-31 23:59:59', 1),
('2012-01-01 00:00:00', 2),
('2012-01-01 00:00:01', 3)):
result = df['a'][ts]
self.assertIsInstance(result, np.int64)
self.assertEqual(result, expected)
self.assertRaises(KeyError, df.__getitem__, ts)

# Timestamp with resolution less precise than 'minute'
for ts in ('2011', '2011-12', '2011-12-31', '2011-12-31 23',
'2011-12-31 23:59'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][:1]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[:1]
assert_frame_equal(result, expected)

# The same as previous but several elements in the slice
for ts in ('2012', '2012-01', '2012-01-01', '2012-01-01 00',
'2012-01-01 00:00'):
# Series should return slice
result = df['a'][ts]
expected = df['a'][1:]
assert_series_equal(result, expected)

# Frame should return slice as well
result = df[ts]
expected = df[1:]
assert_frame_equal(result, expected)

# Not possible to create a string that represents timestamp
# that is more exact then 'second'

def test_partial_slicing_with_multiindex(self):

# GH 4758
Expand Down