Skip to content

BUG: GH3448 Unordered time series selection was misbehaving when using label slicing #3452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 25, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -50,16 +50,17 @@ pandas 0.12.0
- Fix to_csv issue when having a large number of rows and ``NaT`` in some
columns (GH3437_)
- ``.loc`` was not raising when passed an integer list (GH3449_)
- Unordered time series selection was misbehaving when using label slicing (GH3448_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
.. _GH3251: https://github.com/pydata/pandas/issues/3251
.. _GH3379: https://github.com/pydata/pandas/issues/3379
.. _GH3038: https://github.com/pydata/pandas/issues/3038
.. _GH3437: https://github.com/pydata/pandas/issues/3437
.. _GH3448: https://github.com/pydata/pandas/issues/3448
.. _GH3449: https://github.com/pydata/pandas/issues/3449



pandas 0.11.0
=============

Expand Down
8 changes: 7 additions & 1 deletion pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,13 @@ def slice_indexer(self, start=None, end=None, step=None):
This function assumes that the data is sorted, so use at your own peril
"""
start_slice, end_slice = self.slice_locs(start, end)
return slice(start_slice, end_slice, step)

# return a slice
if np.isscalar(start_slice) and np.isscalar(end_slice):
return slice(start_slice, end_slice, step)

# loc indexers
return Index(start_slice) & Index(end_slice)

def slice_locs(self, start=None, end=None):
"""
Expand Down
73 changes: 49 additions & 24 deletions pandas/tseries/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,9 @@ def intersection(self, other):
left_chunk = left.values[lslice]
return self._view_like(left_chunk)

def _partial_date_slice(self, reso, parsed):
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):

is_monotonic = self.is_monotonic

if reso == 'year':
t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
Expand All @@ -1083,20 +1085,20 @@ def _partial_date_slice(self, reso, parsed):
d = tslib.monthrange(parsed.year, qe)[1] # at end of month
t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
t2 = Timestamp(datetime(parsed.year, qe, d), tz=self.tz)
elif reso == 'day' and self._resolution < Resolution.RESO_DAY:
elif (reso == 'day' and (self._resolution < Resolution.RESO_DAY or not is_monotonic)):
st = datetime(parsed.year, parsed.month, parsed.day)
t1 = Timestamp(st, tz=self.tz)
t2 = st + offsets.Day()
t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
elif (reso == 'hour' and
self._resolution < Resolution.RESO_HR):
elif (reso == 'hour' and (
self._resolution < Resolution.RESO_HR or not is_monotonic)):
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour)
t1 = Timestamp(st, tz=self.tz)
t2 = Timestamp(Timestamp(st + offsets.Hour(),
tz=self.tz).value - 1)
elif (reso == 'minute' and
self._resolution < Resolution.RESO_MIN):
elif (reso == 'minute' and (
self._resolution < Resolution.RESO_MIN or not is_monotonic)):
st = datetime(parsed.year, parsed.month, parsed.day,
hour=parsed.hour, minute=parsed.minute)
t1 = Timestamp(st, tz=self.tz)
Expand All @@ -1108,15 +1110,18 @@ def _partial_date_slice(self, reso, parsed):

stamps = self.asi8

if self.is_monotonic:
if is_monotonic:

# a monotonic (sorted) series can be sliced
left = stamps.searchsorted(t1.value, side='left')
right = stamps.searchsorted(t2.value, side='right')
left = stamps.searchsorted(t1.value, side='left') if use_lhs else None
right = stamps.searchsorted(t2.value, side='right') if use_rhs else None
return slice(left, right)

lhs_mask = (stamps>=t1.value) if use_lhs else True
rhs_mask = (stamps<=t2.value) if use_rhs else True

# try to find a the dates
return ((stamps>=t1.value) & (stamps<=t2.value)).nonzero()[0]
return (lhs_mask & rhs_mask).nonzero()[0]

def _possibly_promote(self, other):
if other.inferred_type == 'date':
Expand Down Expand Up @@ -1182,11 +1187,11 @@ def get_loc(self, key):
except (KeyError, ValueError):
raise KeyError(key)

def _get_string_slice(self, key):
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
freq = getattr(self, 'freqstr',
getattr(self, 'inferred_freq', None))
_, parsed, reso = parse_time_string(key, freq)
loc = self._partial_date_slice(reso, parsed)
loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
return loc

def slice_indexer(self, start=None, end=None, step=None):
Expand All @@ -1208,20 +1213,40 @@ def slice_locs(self, start=None, end=None):
Index.slice_locs, customized to handle partial ISO-8601 string slicing
"""
if isinstance(start, basestring) or isinstance(end, basestring):
try:
if start:
start_loc = self._get_string_slice(start).start
else:
start_loc = 0

if end:
end_loc = self._get_string_slice(end).stop
else:
end_loc = len(self)
if self.is_monotonic:
try:
if start:
start_loc = self._get_string_slice(start).start
else:
start_loc = 0

if end:
end_loc = self._get_string_slice(end).stop
else:
end_loc = len(self)

return start_loc, end_loc
except KeyError:
pass

return start_loc, end_loc
except KeyError:
pass
else:
# can't use a slice indexer because we are not sorted!
# so create an indexer directly
try:
if start:
start_loc = self._get_string_slice(start,use_rhs=False)
else:
start_loc = np.arange(len(self))

if end:
end_loc = self._get_string_slice(end,use_lhs=False)
else:
end_loc = np.arange(len(self))

return start_loc, end_loc
except KeyError:
pass

if isinstance(start, time) or isinstance(end, time):
raise KeyError('Cannot use slice_locs with time slice keys')
Expand Down
17 changes: 17 additions & 0 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,23 @@ def test_indexing_unordered(self):
result = ts2[t]
self.assertTrue(expected == result)

# GH 3448 (ranges)
def compare(slobj):
result = ts2[slobj].copy()
result = result.sort_index()
expected = ts[slobj]
assert_series_equal(result,expected)

compare(slice('2011-01-01','2011-01-15'))
compare(slice('2010-12-30','2011-01-15'))
compare(slice('2011-01-01','2011-01-16'))

# partial ranges
compare(slice('2011-01-01','2011-01-6'))
compare(slice('2011-01-06','2011-01-8'))
compare(slice('2011-01-06','2011-01-12'))

# single values
result = ts2['2011'].sort_index()
expected = ts['2011']
assert_series_equal(result,expected)
Expand Down