From 9bf8463bda958f8fa1748f002da5b9e07676f883 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 21 Mar 2013 20:49:46 -0400 Subject: [PATCH 1/3] ENH: added selection to an unordered timeseries the same semantics as an ordered timeseries (GH2437) --- RELEASE.rst | 4 +++ doc/source/v0.11.0.txt | 3 +++ pandas/tseries/frequencies.py | 10 ++++---- pandas/tseries/index.py | 17 ++++++++----- pandas/tseries/tests/test_timeseries.py | 34 ++++++++++++++++++++----- 5 files changed, 50 insertions(+), 18 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index c1fa30e23bc5a..38b7eef21d8b8 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -202,6 +202,8 @@ pandas 0.11.0 - Fixed bug in Timestamp(d,tz=foo) when d is date() rather then datetime() (GH2993_) - series.plot(kind='bar') now respects pylab color schem (GH3115_) - Fixed bug in reshape if not passed correct input, now raises TypeError (GH2719_) + - Allow selection in an *unordered* timeseries to work similary + to an *ordered* timeseries (GH2437_). Fix NameError issue on RESO_US (GH2787_) .. _GH2758: https://github.com/pydata/pandas/issues/2758 .. _GH2809: https://github.com/pydata/pandas/issues/2809 @@ -227,6 +229,8 @@ pandas 0.11.0 .. _GH2751: https://github.com/pydata/pandas/issues/2751 .. _GH2776: https://github.com/pydata/pandas/issues/2776 .. _GH2778: https://github.com/pydata/pandas/issues/2778 +.. _GH2437: https://github.com/pydata/pandas/issues/2437 +.. _GH2787: https://github.com/pydata/pandas/issues/2787 .. _GH2793: https://github.com/pydata/pandas/issues/2793 .. _GH2795: https://github.com/pydata/pandas/issues/2795 .. _GH2819: https://github.com/pydata/pandas/issues/2819 diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index eba37c02c6237..f13fb50f1aa3c 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -243,6 +243,8 @@ Enhancements - In ``HDFStore``, new keywords ``iterator=boolean``, and ``chunksize=number_in_a_chunk`` are provided to support iteration on ``select`` and ``select_as_multiple`` (GH3076_) + - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_) + - ``Squeeze`` to possibly remove length 1 dimensions from an object. .. ipython:: python @@ -293,6 +295,7 @@ See the `full release notes `__ or issue tracker on GitHub for a complete list. +.. _GH2437: https://github.com/pydata/pandas/issues/2437 .. _GH2809: https://github.com/pydata/pandas/issues/2809 .. _GH2810: https://github.com/pydata/pandas/issues/2810 .. _GH2837: https://github.com/pydata/pandas/issues/2837 diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3bf29af8581a9..a43c80bf22158 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -34,11 +34,11 @@ class Resolution(object): @classmethod def get_str(cls, reso): - return {RESO_US: 'microsecond', - RESO_SEC: 'second', - RESO_MIN: 'minute', - RESO_HR: 'hour', - RESO_DAY: 'day'}.get(reso, 'day') + return {cls.RESO_US: 'microsecond', + cls.RESO_SEC: 'second', + cls.RESO_MIN: 'minute', + cls.RESO_HR: 'hour', + cls.RESO_DAY: 'day'}.get(reso, 'day') def get_reso_string(reso): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index c91a1ebd5568f..87272a861d8cf 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1042,9 +1042,6 @@ def intersection(self, other): return self._view_like(left_chunk) def _partial_date_slice(self, reso, parsed): - if not self.is_monotonic: - raise TimeSeriesError('Partial indexing only valid for ordered ' - 'time series.') if reso == 'year': t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz) @@ -1079,11 +1076,19 @@ def _partial_date_slice(self, reso, parsed): tz=self.tz).value - 1) else: raise KeyError + stamps = self.asi8 - left = stamps.searchsorted(t1.value, side='left') - right = stamps.searchsorted(t2.value, side='right') - return slice(left, right) + + if self.is_monotonic: + + # a monotonic (sorted) series can be sliced + left = stamps.searchsorted(t1.value, side='left') + right = stamps.searchsorted(t2.value, side='right') + return slice(left, right) + + # try to find a the dates + return np.where((stamps>=t1.value) & (stamps<=t2.value))[0] def _possibly_promote(self, other): if other.inferred_type == 'date': diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2ec4fd7ffd67b..6155590100452 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -18,6 +18,7 @@ import pandas.core.datetools as datetools import pandas.tseries.offsets as offsets import pandas.tseries.frequencies as fmod +from pandas.tseries.index import TimeSeriesError import pandas as pd from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -168,6 +169,32 @@ def test_indexing_over_size_cutoff(self): finally: _index._SIZE_CUTOFF = old_cutoff + def test_indexing_unordered(self): + + # GH 2437 + from pandas import concat + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(randn(len(rng)), index=rng) + ts2 = concat([ts[0:4],ts[-4:],ts[4:-4]]) + + for t in ts.index: + s = str(t) + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result,expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) def assert_range_equal(left, right): assert(left.equals(right)) @@ -2017,13 +2044,6 @@ def test_partial_slice_minutely(self): self.assert_(s['2005-1-1 23:59:00'] == s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - def test_partial_not_monotonic(self): - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - self.assertRaises(Exception, ts.__getitem__, '2005') - def test_date_range_normalize(self): snap = datetime.today() n = 50 From 6bd6dcf3aa3aed34f67f37a140757079f7ada05b Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 22 Mar 2013 09:09:43 -0400 Subject: [PATCH 2/3] ENH: GH3070, extend slicing semantics for datelike indexed DataFrames with a string to work like TimeSeries (e.g. df['2001'] works) --- RELEASE.rst | 12 ++++++++ doc/source/v0.11.0.txt | 12 ++++++++ pandas/core/frame.py | 41 ++++++++++--------------- pandas/core/indexing.py | 24 +++++++++++++++ pandas/tseries/tests/test_timeseries.py | 28 +++++++++++++++-- 5 files changed, 90 insertions(+), 27 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 38b7eef21d8b8..02fd8ebc2ac46 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -98,6 +98,17 @@ pandas 0.11.0 histograms. (GH2710_). - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. + - Allow selection semantics for via a string with a datelike index to work in both + Series and DataFrames (GH3070_) + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] **API Changes** @@ -262,6 +273,7 @@ pandas 0.11.0 .. _GH3059: https://github.com/pydata/pandas/issues/3059 .. _GH2993: https://github.com/pydata/pandas/issues/2993 .. _GH3115: https://github.com/pydata/pandas/issues/3115 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 pandas 0.10.1 ============= diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index f13fb50f1aa3c..bef314dfabd2a 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -245,6 +245,17 @@ Enhancements - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_) + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_ + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] + - ``Squeeze`` to possibly remove length 1 dimensions from an object. .. ipython:: python @@ -313,3 +324,4 @@ on GitHub for a complete list. .. _GH3011: https://github.com/pydata/pandas/issues/3011 .. _GH3076: https://github.com/pydata/pandas/issues/3076 .. _GH3059: https://github.com/pydata/pandas/issues/3059 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6ef2ad642612c..b47b77fdaeb6c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -28,7 +28,7 @@ from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, - _is_index_slice, _check_bool_indexer, + _convert_to_index_sliceable, _check_bool_indexer, _maybe_convert_indices) from pandas.core.internals import BlockManager, make_block, form_blocks from pandas.core.series import Series, _radd_compat @@ -1864,10 +1864,13 @@ def iget_value(self, i, j): return self.iat[i,j] def __getitem__(self, key): - if isinstance(key, slice): - # slice rows - return self._getitem_slice(key) - elif isinstance(key, (np.ndarray, list)): + + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._getitem_slice(indexer) + + if isinstance(key, (np.ndarray, list)): # either boolean or fancy integer index return self._getitem_array(key) elif isinstance(key, DataFrame): @@ -1879,14 +1882,7 @@ def __getitem__(self, key): return self._get_item_cache(key) def _getitem_slice(self, key): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - return self._slice(indexer, axis=0) + return self._slice(key, axis=0) def _getitem_array(self, key): # also raises Exception if object array with NA values @@ -1982,10 +1978,12 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) def __setitem__(self, key, value): - if isinstance(key, slice): - # slice rows - self._setitem_slice(key, value) - elif isinstance(key, (np.ndarray, list)): + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._setitem_slice(indexer, value) + + if isinstance(key, (np.ndarray, list)): self._setitem_array(key, value) elif isinstance(key, DataFrame): self._setitem_frame(key, value) @@ -1994,14 +1992,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - self.ix._setitem_with_indexer(indexer, value) + self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): # also raises Exception if object array with NA values diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 16259fd39c0a9..3d4ac12a4efd7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -827,6 +827,30 @@ def _convert_key(self, key): _eps = np.finfo('f4').eps +def _convert_to_index_sliceable(obj, key): + """ if we are index sliceable, then return my slicer, otherwise return None """ + idx = obj.index + if isinstance(key, slice): + idx_type = idx.inferred_type + if idx_type == 'floating': + indexer = obj.ix._convert_to_indexer(key, axis=0) + elif idx_type == 'integer' or _is_index_slice(key): + indexer = key + else: + indexer = obj.ix._convert_to_indexer(key, axis=0) + return indexer + + elif isinstance(key, basestring): + + # we need a timelike key here + if idx.is_all_dates: + try: + return idx._get_string_slice(key) + except: + return None + + return None + def _is_index_slice(obj): def _is_valid_index(x): return (com.is_integer(x) or com.is_float(x) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 6155590100452..3ebbd10395784 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -171,7 +171,7 @@ def test_indexing_over_size_cutoff(self): def test_indexing_unordered(self): - # GH 2437 + # GH 2437 (series) from pandas import concat rng = date_range(start='2011-01-01', end='2011-01-15') ts = Series(randn(len(rng)), index=rng) @@ -196,12 +196,36 @@ def test_indexing_unordered(self): for t in result.index: self.assertTrue(t.year == 2005) + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + + df = DataFrame(dict(A = ts)) + result = df['2001']['A'] + assert_series_equal(expected,result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + + df.loc['2001','A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected,result) + + + def assert_range_equal(left, right): assert(left.equals(right)) assert(left.freq == right.freq) assert(left.tz == right.tz) - class TestTimeSeries(unittest.TestCase): _multiprocess_can_split_ = True From 13a3334bac261c661de8c1926189333abfc50bae Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 22 Mar 2013 09:15:03 -0400 Subject: [PATCH 3/3] DOC: cookbook examples --- RELEASE.rst | 2 +- doc/source/cookbook.rst | 6 ++++++ doc/source/v0.11.0.txt | 2 +- pandas/tseries/index.py | 2 +- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 02fd8ebc2ac46..8746265dd2e70 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -98,7 +98,7 @@ pandas 0.11.0 histograms. (GH2710_). - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. - - Allow selection semantics for via a string with a datelike index to work in both + - Allow selection semantics via a string with a datelike index to work in both Series and DataFrames (GH3070_) .. ipython:: python diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 0a55d78dd24c3..3bc80a36f5561 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -105,6 +105,9 @@ Expanding Data `Alignment and to-date `__ +`Rolling Computation window based on values instead of counts +`__ + Splitting ~~~~~~~~~ @@ -171,6 +174,9 @@ CSV `Reading the first few lines of a frame `__ +`Inferring dtypes from a file +`__ + SQL ~~~ diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index bef314dfabd2a..87b861a45dbae 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -245,7 +245,7 @@ Enhancements - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_) - - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_ + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_) .. ipython:: python diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 87272a861d8cf..25c94900d159c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1088,7 +1088,7 @@ def _partial_date_slice(self, reso, parsed): return slice(left, right) # try to find a the dates - return np.where((stamps>=t1.value) & (stamps<=t2.value))[0] + return ((stamps>=t1.value) & (stamps<=t2.value)).nonzero()[0] def _possibly_promote(self, other): if other.inferred_type == 'date':