Skip to content

Fix inconsistency in Partial String Index with 'second' resolution #14856

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1293,14 +1293,13 @@ def _parsed_string_to_bounds(self, reso, parsed):

def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
is_monotonic = self.is_monotonic
if ((reso in ['day', 'hour', 'minute'] and
not (self._resolution < Resolution.get_reso(reso) or
not is_monotonic)) or
(reso == 'second' and
not (self._resolution <= Resolution.RESO_SEC or
not is_monotonic))):
if (is_monotonic
and reso in ['day', 'hour', 'minute', 'second']
and self._resolution >= Resolution.get_reso(reso)):
# These resolution/monotonicity validations came from GH3931,
# GH3452 and GH2369.

# See also GH14826
raise KeyError

if reso == 'microsecond':
Expand Down
163 changes: 163 additions & 0 deletions pandas/tseries/tests/test_partial_string_indexes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import numpy as np

import pandas.util.testing as tm
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instead of creating a new files, these should go with the existing tests in test_timeseries.py

further, don't create new ways of testing, just follow along the existing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks. I'll move it to test_timeseries.py. Should I keep it in the separate class or unify with one of the existing? What do you mean under "new ways of testing"? Is it okay to add functions like assert_exact that I did?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put it with the existing tests
pls following the existing testing methodologies

iow don't create your own helpers it's not standard

from pandas import DataFrame, DatetimeIndex

from pandas.util.testing import assert_frame_equal, assert_series_equal


class TestTimeSeriesPartialSlices(tm.TestCase):
_multiprocess_can_split_ = True

def assert_exact(self, df, ts, value):
element = df['a'][ts]

# Series should return scalar
self.assertIsInstance(element, np.int64)
self.assertEqual(element, value)

# Frame should raise (exact match)
self.assertRaises(KeyError, df.__getitem__, ts)

# TODO: test falling to column selection

def assert_slice(self, df, ts, the_slice):
# Series should return slice
expected = df['a'][the_slice]
assert_series_equal(df['a'][ts], expected)

# Frame should return slice as well
expected = df[the_slice]
assert_frame_equal(df[ts], expected)

def assert_key_error(self, df, ts):
self.assertRaises(KeyError, df['a'].__getitem__, ts)
self.assertRaises(KeyError, df.__getitem__, ts)

def test_partial_slices_day(self):
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31',
'2012-01-01',
'2012-01-02']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'day')

# Timestamp with resolution 'day'
self.assert_exact(df, '2011-12-31', 1)
self.assert_exact(df, '2012-01-01', 2)
self.assert_exact(df, '2012-01-02', 3)

# Timestamp with resolution less precise than 'day'
for ts in ['2011', '2011-12']:
self.assert_slice(df, ts, slice(None, 1))

# The same as previous but several elements in the slice
for ts in ['2012', '2012-01']:
self.assert_slice(df, ts, slice(1, None))

# Timestamp with resolution more precise than 'day'
# Compatible with existing key
for ts in ['2012-01-01 00', '2012-01-01 00:00',
'2012-01-01 00:00:00']:
self.assert_exact(df, ts, 2)

# Timestamp with resolution more precise than 'day'
# Not compatible with existing key
for ts in ['2012-01-01 01', '2012-01-01 00:01',
'2012-01-01 00:00:01']:
self.assert_key_error(df, ts)

def test_partial_slice_hour(self):
df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23',
'2012-01-01 00',
'2012-01-01 01']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'hour')

# Timestamp with resolution 'hour'
self.assert_exact(df, '2011-12-31 23', 1)
self.assert_exact(df, '2012-01-01 00', 2)
self.assert_exact(df, '2012-01-01 01', 3)

# Timestamp with resolution less precise than 'hour'
for ts in ['2011', '2011-12', '2011-12-31']:
self.assert_slice(df, ts, slice(None, 1))

# The same as previous but several elements in the slice
for ts in ['2012', '2012-01', '2012-01-01']:
self.assert_slice(df, ts, slice(1, None))

# Timestamp with resolution more precise than 'hour'
# Compatible with existing key
for ts in ['2012-01-01 00:00',
'2012-01-01 00:00:00']:
self.assert_exact(df, ts, 2)

# Timestamp with resolution more precise than 'hour'
# Not compatible with existing key
for ts in ['2012-01-01 00:01',
'2012-01-01 00:00:01']:
self.assert_key_error(df, ts)

def test_partial_slice_minute(self):
df = DataFrame({'a': [1, 2, 3]},
DatetimeIndex(['2011-12-31 23:59',
'2012-01-01 00:00',
'2012-01-01 00:01']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'minute')

# Timestamp with resolution 'minute'
self.assert_exact(df, '2011-12-31 23:59', 1)
self.assert_exact(df, '2012-01-01 00:00', 2)
self.assert_exact(df, '2012-01-01 00:01', 3)

# Timestamp with resolution less precise than 'minute'
for ts in ['2011', '2011-12', '2011-12-31',
'2011-12-31 23']:
self.assert_slice(df, ts, slice(None, 1))

# The same as previous but several elements in the slice
for ts in ['2012', '2012-01', '2012-01-01',
'2012-01-01 00']:
self.assert_slice(df, ts, slice(1, None))

# Timestamp with resolution more precise than 'minute'
# Compatible with existing key
for ts in ['2012-01-01 00:00:00']:
self.assert_exact(df, ts, 2)

# Timestamp with resolution more precise than 'minute'
# Not compatible with existing key
for ts in ['2012-01-01 00:00:01']:
self.assert_key_error(df, ts)

def test_partial_slice_second(self):
# See GH14826
df = DataFrame({'a': [1, 2, 3]},
DatetimeIndex(['2011-12-31 23:59:59',
'2012-01-01 00:00:00',
'2012-01-01 00:00:01']),
dtype=np.int64)

self.assertEqual(df.index.resolution, 'second')

# Timestamp with resolution 'second'
self.assert_exact(df, '2011-12-31 23:59:59', 1)
self.assert_exact(df, '2012-01-01 00:00:00', 2)
self.assert_exact(df, '2012-01-01 00:00:01', 3)

# Timestamp with resolution less precise than 'second'
for ts in ['2011', '2011-12', '2011-12-31',
'2011-12-31 23', '2011-12-31 23:59']:
self.assert_slice(df, ts, slice(None, 1))

# The same as previous but several elements in the slice
for ts in ['2012', '2012-01', '2012-01-01',
'2012-01-01 00', '2012-01-01 00:00']:
self.assert_slice(df, ts, slice(1, None))

# Not possible to create a string that represents timestamp
# that is more exact then 'second'
6 changes: 3 additions & 3 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,18 +266,18 @@ def test_indexing(self):
expected = ts['2013']
assert_series_equal(expected, ts)

# GH 3925, indexing with a seconds resolution string / datetime object
# GH14826, indexing with a seconds resolution string / datetime object
df = DataFrame(randn(5, 5),
columns=['open', 'high', 'low', 'close', 'volume'],
index=date_range('2012-01-02 18:01:00',
periods=5, tz='US/Central', freq='s'))
expected = df.loc[[df.index[2]]]
result = df['2012-01-02 18:01:02']
assert_frame_equal(result, expected)

# this is a single date, so will raise
self.assertRaises(KeyError, df.__getitem__, '2012-01-02 18:01:02', )
self.assertRaises(KeyError, df.__getitem__, df.index[2], )


def test_recreate_from_data(self):
freqs = ['M', 'Q', 'A', 'D', 'B', 'BH', 'T', 'S', 'L', 'U', 'H', 'N',
'C']
Expand Down