Skip to content

Commit 0884323

Browse files
committed
BUG: GH3448 Unordered time series selection was misbehaving when using label slicing
1 parent 4aea3f2 commit 0884323

File tree

4 files changed

+75
-26
lines changed

4 files changed

+75
-26
lines changed

RELEASE.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,17 @@ pandas 0.12.0
5050
- Fix to_csv issue when having a large number of rows and ``NaT`` in some
5151
columns (GH3437_)
5252
- ``.loc`` was not raising when passed an integer list (GH3449_)
53+
- Unordered time series selection was misbehaving when using label slicing (GH3448_)
5354

5455
.. _GH3164: https://github.com/pydata/pandas/issues/3164
5556
.. _GH3251: https://github.com/pydata/pandas/issues/3251
5657
.. _GH3379: https://github.com/pydata/pandas/issues/3379
5758
.. _GH3038: https://github.com/pydata/pandas/issues/3038
5859
.. _GH3437: https://github.com/pydata/pandas/issues/3437
60+
.. _GH3448: https://github.com/pydata/pandas/issues/3448
5961
.. _GH3449: https://github.com/pydata/pandas/issues/3449
6062

6163

62-
6364
pandas 0.11.0
6465
=============
6566

pandas/core/index.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -1178,7 +1178,13 @@ def slice_indexer(self, start=None, end=None, step=None):
11781178
This function assumes that the data is sorted, so use at your own peril
11791179
"""
11801180
start_slice, end_slice = self.slice_locs(start, end)
1181-
return slice(start_slice, end_slice, step)
1181+
1182+
# return a slice
1183+
if np.isscalar(start_slice) and np.isscalar(end_slice):
1184+
return slice(start_slice, end_slice, step)
1185+
1186+
# loc indexers
1187+
return Index(start_slice) & Index(end_slice)
11821188

11831189
def slice_locs(self, start=None, end=None):
11841190
"""

pandas/tseries/index.py

+49-24
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,9 @@ def intersection(self, other):
10691069
left_chunk = left.values[lslice]
10701070
return self._view_like(left_chunk)
10711071

1072-
def _partial_date_slice(self, reso, parsed):
1072+
def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
1073+
1074+
is_monotonic = self.is_monotonic
10731075

10741076
if reso == 'year':
10751077
t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
@@ -1083,20 +1085,20 @@ def _partial_date_slice(self, reso, parsed):
10831085
d = tslib.monthrange(parsed.year, qe)[1] # at end of month
10841086
t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
10851087
t2 = Timestamp(datetime(parsed.year, qe, d), tz=self.tz)
1086-
elif reso == 'day' and self._resolution < Resolution.RESO_DAY:
1088+
elif (reso == 'day' and (self._resolution < Resolution.RESO_DAY or not is_monotonic)):
10871089
st = datetime(parsed.year, parsed.month, parsed.day)
10881090
t1 = Timestamp(st, tz=self.tz)
10891091
t2 = st + offsets.Day()
10901092
t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
1091-
elif (reso == 'hour' and
1092-
self._resolution < Resolution.RESO_HR):
1093+
elif (reso == 'hour' and (
1094+
self._resolution < Resolution.RESO_HR or not is_monotonic)):
10931095
st = datetime(parsed.year, parsed.month, parsed.day,
10941096
hour=parsed.hour)
10951097
t1 = Timestamp(st, tz=self.tz)
10961098
t2 = Timestamp(Timestamp(st + offsets.Hour(),
10971099
tz=self.tz).value - 1)
1098-
elif (reso == 'minute' and
1099-
self._resolution < Resolution.RESO_MIN):
1100+
elif (reso == 'minute' and (
1101+
self._resolution < Resolution.RESO_MIN or not is_monotonic)):
11001102
st = datetime(parsed.year, parsed.month, parsed.day,
11011103
hour=parsed.hour, minute=parsed.minute)
11021104
t1 = Timestamp(st, tz=self.tz)
@@ -1108,15 +1110,18 @@ def _partial_date_slice(self, reso, parsed):
11081110

11091111
stamps = self.asi8
11101112

1111-
if self.is_monotonic:
1113+
if is_monotonic:
11121114

11131115
# a monotonic (sorted) series can be sliced
1114-
left = stamps.searchsorted(t1.value, side='left')
1115-
right = stamps.searchsorted(t2.value, side='right')
1116+
left = stamps.searchsorted(t1.value, side='left') if use_lhs else None
1117+
right = stamps.searchsorted(t2.value, side='right') if use_rhs else None
11161118
return slice(left, right)
11171119

1120+
lhs_mask = (stamps>=t1.value) if use_lhs else True
1121+
rhs_mask = (stamps<=t2.value) if use_rhs else True
1122+
11181123
# try to find a the dates
1119-
return ((stamps>=t1.value) & (stamps<=t2.value)).nonzero()[0]
1124+
return (lhs_mask & rhs_mask).nonzero()[0]
11201125

11211126
def _possibly_promote(self, other):
11221127
if other.inferred_type == 'date':
@@ -1182,11 +1187,11 @@ def get_loc(self, key):
11821187
except (KeyError, ValueError):
11831188
raise KeyError(key)
11841189

1185-
def _get_string_slice(self, key):
1190+
def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
11861191
freq = getattr(self, 'freqstr',
11871192
getattr(self, 'inferred_freq', None))
11881193
_, parsed, reso = parse_time_string(key, freq)
1189-
loc = self._partial_date_slice(reso, parsed)
1194+
loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
11901195
return loc
11911196

11921197
def slice_indexer(self, start=None, end=None, step=None):
@@ -1208,20 +1213,40 @@ def slice_locs(self, start=None, end=None):
12081213
Index.slice_locs, customized to handle partial ISO-8601 string slicing
12091214
"""
12101215
if isinstance(start, basestring) or isinstance(end, basestring):
1211-
try:
1212-
if start:
1213-
start_loc = self._get_string_slice(start).start
1214-
else:
1215-
start_loc = 0
12161216

1217-
if end:
1218-
end_loc = self._get_string_slice(end).stop
1219-
else:
1220-
end_loc = len(self)
1217+
if self.is_monotonic:
1218+
try:
1219+
if start:
1220+
start_loc = self._get_string_slice(start).start
1221+
else:
1222+
start_loc = 0
1223+
1224+
if end:
1225+
end_loc = self._get_string_slice(end).stop
1226+
else:
1227+
end_loc = len(self)
1228+
1229+
return start_loc, end_loc
1230+
except KeyError:
1231+
pass
12211232

1222-
return start_loc, end_loc
1223-
except KeyError:
1224-
pass
1233+
else:
1234+
# can't use a slice indexer because we are not sorted!
1235+
# so create an indexer directly
1236+
try:
1237+
if start:
1238+
start_loc = self._get_string_slice(start,use_rhs=False)
1239+
else:
1240+
start_loc = np.arange(len(self))
1241+
1242+
if end:
1243+
end_loc = self._get_string_slice(end,use_lhs=False)
1244+
else:
1245+
end_loc = np.arange(len(self))
1246+
1247+
return start_loc, end_loc
1248+
except KeyError:
1249+
pass
12251250

12261251
if isinstance(start, time) or isinstance(end, time):
12271252
raise KeyError('Cannot use slice_locs with time slice keys')

pandas/tseries/tests/test_timeseries.py

+17
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,23 @@ def test_indexing_unordered(self):
183183
result = ts2[t]
184184
self.assertTrue(expected == result)
185185

186+
# GH 3448 (ranges)
187+
def compare(slobj):
188+
result = ts2[slobj].copy()
189+
result = result.sort_index()
190+
expected = ts[slobj]
191+
assert_series_equal(result,expected)
192+
193+
compare(slice('2011-01-01','2011-01-15'))
194+
compare(slice('2010-12-30','2011-01-15'))
195+
compare(slice('2011-01-01','2011-01-16'))
196+
197+
# partial ranges
198+
compare(slice('2011-01-01','2011-01-6'))
199+
compare(slice('2011-01-06','2011-01-8'))
200+
compare(slice('2011-01-06','2011-01-12'))
201+
202+
# single values
186203
result = ts2['2011'].sort_index()
187204
expected = ts['2011']
188205
assert_series_equal(result,expected)

0 commit comments

Comments
 (0)