BUG: GH3448 Unordered time series selection was misbehaving when using label slicing

jreback · jreback · commit 088432395650 · 2013-04-25T08:28:46.000-04:00
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -50,16 +50,17 @@ pandas 0.12.0
   - Fix to_csv issue when having a large number of rows and ``NaT`` in some 
     columns (GH3437_)
   - ``.loc`` was not raising when passed an integer list (GH3449_)
+  - Unordered time series selection was misbehaving when using label slicing (GH3448_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
 .. _GH3379: https://github.com/pydata/pandas/issues/3379
 .. _GH3038: https://github.com/pydata/pandas/issues/3038
 .. _GH3437: https://github.com/pydata/pandas/issues/3437
+.. _GH3448: https://github.com/pydata/pandas/issues/3448
 .. _GH3449: https://github.com/pydata/pandas/issues/3449
 
 
-
 pandas 0.11.0
 =============
 
diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1178,7 +1178,13 @@ def slice_indexer(self, start=None, end=None, step=None):
         This function assumes that the data is sorted, so use at your own peril
         """
         start_slice, end_slice = self.slice_locs(start, end)
-        return slice(start_slice, end_slice, step)
+
+        # return a slice
+        if np.isscalar(start_slice) and np.isscalar(end_slice):
+            return slice(start_slice, end_slice, step)
+
+        # loc indexers
+        return Index(start_slice) & Index(end_slice)
 
     def slice_locs(self, start=None, end=None):
         """
diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py
@@ -1069,7 +1069,9 @@ def intersection(self, other):
             left_chunk = left.values[lslice]
             return self._view_like(left_chunk)
 
-    def _partial_date_slice(self, reso, parsed):
+    def _partial_date_slice(self, reso, parsed, use_lhs=True, use_rhs=True):
+        
+        is_monotonic = self.is_monotonic
 
         if reso == 'year':
             t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz)
@@ -1083,20 +1085,20 @@ def _partial_date_slice(self, reso, parsed):
             d = tslib.monthrange(parsed.year, qe)[1]   # at end of month
             t1 = Timestamp(datetime(parsed.year, parsed.month, 1), tz=self.tz)
             t2 = Timestamp(datetime(parsed.year, qe, d), tz=self.tz)
-        elif reso == 'day' and self._resolution < Resolution.RESO_DAY:
+        elif (reso == 'day' and (self._resolution < Resolution.RESO_DAY or not is_monotonic)):
             st = datetime(parsed.year, parsed.month, parsed.day)
             t1 = Timestamp(st, tz=self.tz)
             t2 = st + offsets.Day()
             t2 = Timestamp(Timestamp(t2, tz=self.tz).value - 1)
-        elif (reso == 'hour' and
-              self._resolution < Resolution.RESO_HR):
+        elif (reso == 'hour' and (
+                self._resolution < Resolution.RESO_HR or not is_monotonic)):
             st = datetime(parsed.year, parsed.month, parsed.day,
                           hour=parsed.hour)
             t1 = Timestamp(st, tz=self.tz)
             t2 = Timestamp(Timestamp(st + offsets.Hour(),
                                      tz=self.tz).value - 1)
-        elif (reso == 'minute' and
-              self._resolution < Resolution.RESO_MIN):
+        elif (reso == 'minute' and (
+                self._resolution < Resolution.RESO_MIN or not is_monotonic)):
             st = datetime(parsed.year, parsed.month, parsed.day,
                           hour=parsed.hour, minute=parsed.minute)
             t1 = Timestamp(st, tz=self.tz)
@@ -1108,15 +1110,18 @@ def _partial_date_slice(self, reso, parsed):
 
         stamps = self.asi8
 
-        if self.is_monotonic:
+        if is_monotonic:
 
             # a monotonic (sorted) series can be sliced
-            left = stamps.searchsorted(t1.value, side='left')
-            right = stamps.searchsorted(t2.value, side='right')
+            left = stamps.searchsorted(t1.value, side='left') if use_lhs else None
+            right = stamps.searchsorted(t2.value, side='right') if use_rhs else None
             return slice(left, right)
 
+        lhs_mask = (stamps>=t1.value) if use_lhs else True
+        rhs_mask = (stamps<=t2.value) if use_rhs else True
+
         # try to find a the dates
-        return ((stamps>=t1.value) & (stamps<=t2.value)).nonzero()[0]
+        return (lhs_mask & rhs_mask).nonzero()[0]
 
     def _possibly_promote(self, other):
         if other.inferred_type == 'date':
@@ -1182,11 +1187,11 @@ def get_loc(self, key):
             except (KeyError, ValueError):
                 raise KeyError(key)
 
-    def _get_string_slice(self, key):
+    def _get_string_slice(self, key, use_lhs=True, use_rhs=True):
         freq = getattr(self, 'freqstr',
                        getattr(self, 'inferred_freq', None))
         _, parsed, reso = parse_time_string(key, freq)
-        loc = self._partial_date_slice(reso, parsed)
+        loc = self._partial_date_slice(reso, parsed, use_lhs=use_lhs, use_rhs=use_rhs)
         return loc
 
     def slice_indexer(self, start=None, end=None, step=None):
@@ -1208,20 +1213,40 @@ def slice_locs(self, start=None, end=None):
         Index.slice_locs, customized to handle partial ISO-8601 string slicing
         """
         if isinstance(start, basestring) or isinstance(end, basestring):
-            try:
-                if start:
-                    start_loc = self._get_string_slice(start).start
-                else:
-                    start_loc = 0
 
-                if end:
-                    end_loc = self._get_string_slice(end).stop
-                else:
-                    end_loc = len(self)
+            if self.is_monotonic:
+                try:
+                    if start:
+                        start_loc = self._get_string_slice(start).start
+                    else:
+                        start_loc = 0
+                        
+                    if end:
+                        end_loc = self._get_string_slice(end).stop
+                    else:
+                        end_loc = len(self)
+
+                    return start_loc, end_loc
+                except KeyError:
+                    pass
 
-                return start_loc, end_loc
-            except KeyError:
-                pass
+            else:
+                # can't use a slice indexer because we are not sorted!
+                # so create an indexer directly
+                try:
+                    if start:
+                        start_loc = self._get_string_slice(start,use_rhs=False)
+                    else:
+                        start_loc = np.arange(len(self))
+                        
+                    if end:
+                        end_loc = self._get_string_slice(end,use_lhs=False)
+                    else:
+                        end_loc = np.arange(len(self))
+
+                    return start_loc, end_loc
+                except KeyError:
+                    pass
 
         if isinstance(start, time) or isinstance(end, time):
             raise KeyError('Cannot use slice_locs with time slice keys')
diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py
@@ -183,6 +183,23 @@ def test_indexing_unordered(self):
             result = ts2[t]
             self.assertTrue(expected == result)
 
+        # GH 3448 (ranges)
+        def compare(slobj):
+            result = ts2[slobj].copy()
+            result = result.sort_index()
+            expected = ts[slobj]
+            assert_series_equal(result,expected)
+
+        compare(slice('2011-01-01','2011-01-15'))
+        compare(slice('2010-12-30','2011-01-15'))
+        compare(slice('2011-01-01','2011-01-16'))
+
+        # partial ranges
+        compare(slice('2011-01-01','2011-01-6'))
+        compare(slice('2011-01-06','2011-01-8'))
+        compare(slice('2011-01-06','2011-01-12'))
+
+        # single values
         result = ts2['2011'].sort_index()
         expected = ts['2011']
         assert_series_equal(result,expected)