diff --git a/RELEASE.rst b/RELEASE.rst index c1fa30e23bc5a..8746265dd2e70 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -98,6 +98,17 @@ pandas 0.11.0 histograms. (GH2710_). - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. + - Allow selection semantics via a string with a datelike index to work in both + Series and DataFrames (GH3070_) + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] **API Changes** @@ -202,6 +213,8 @@ pandas 0.11.0 - Fixed bug in Timestamp(d,tz=foo) when d is date() rather then datetime() (GH2993_) - series.plot(kind='bar') now respects pylab color schem (GH3115_) - Fixed bug in reshape if not passed correct input, now raises TypeError (GH2719_) + - Allow selection in an *unordered* timeseries to work similary + to an *ordered* timeseries (GH2437_). Fix NameError issue on RESO_US (GH2787_) .. _GH2758: https://github.com/pydata/pandas/issues/2758 .. _GH2809: https://github.com/pydata/pandas/issues/2809 @@ -227,6 +240,8 @@ pandas 0.11.0 .. _GH2751: https://github.com/pydata/pandas/issues/2751 .. _GH2776: https://github.com/pydata/pandas/issues/2776 .. _GH2778: https://github.com/pydata/pandas/issues/2778 +.. _GH2437: https://github.com/pydata/pandas/issues/2437 +.. _GH2787: https://github.com/pydata/pandas/issues/2787 .. _GH2793: https://github.com/pydata/pandas/issues/2793 .. _GH2795: https://github.com/pydata/pandas/issues/2795 .. _GH2819: https://github.com/pydata/pandas/issues/2819 @@ -258,6 +273,7 @@ pandas 0.11.0 .. _GH3059: https://github.com/pydata/pandas/issues/3059 .. _GH2993: https://github.com/pydata/pandas/issues/2993 .. _GH3115: https://github.com/pydata/pandas/issues/3115 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 pandas 0.10.1 ============= diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 0a55d78dd24c3..3bc80a36f5561 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -105,6 +105,9 @@ Expanding Data `Alignment and to-date `__ +`Rolling Computation window based on values instead of counts +`__ + Splitting ~~~~~~~~~ @@ -171,6 +174,9 @@ CSV `Reading the first few lines of a frame `__ +`Inferring dtypes from a file +`__ + SQL ~~~ diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index eba37c02c6237..87b861a45dbae 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -243,6 +243,19 @@ Enhancements - In ``HDFStore``, new keywords ``iterator=boolean``, and ``chunksize=number_in_a_chunk`` are provided to support iteration on ``select`` and ``select_as_multiple`` (GH3076_) + - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_) + + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_) + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] + - ``Squeeze`` to possibly remove length 1 dimensions from an object. .. ipython:: python @@ -293,6 +306,7 @@ See the `full release notes `__ or issue tracker on GitHub for a complete list. +.. _GH2437: https://github.com/pydata/pandas/issues/2437 .. _GH2809: https://github.com/pydata/pandas/issues/2809 .. _GH2810: https://github.com/pydata/pandas/issues/2810 .. _GH2837: https://github.com/pydata/pandas/issues/2837 @@ -310,3 +324,4 @@ on GitHub for a complete list. .. _GH3011: https://github.com/pydata/pandas/issues/3011 .. _GH3076: https://github.com/pydata/pandas/issues/3076 .. _GH3059: https://github.com/pydata/pandas/issues/3059 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6ef2ad642612c..b47b77fdaeb6c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -28,7 +28,7 @@ from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, - _is_index_slice, _check_bool_indexer, + _convert_to_index_sliceable, _check_bool_indexer, _maybe_convert_indices) from pandas.core.internals import BlockManager, make_block, form_blocks from pandas.core.series import Series, _radd_compat @@ -1864,10 +1864,13 @@ def iget_value(self, i, j): return self.iat[i,j] def __getitem__(self, key): - if isinstance(key, slice): - # slice rows - return self._getitem_slice(key) - elif isinstance(key, (np.ndarray, list)): + + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._getitem_slice(indexer) + + if isinstance(key, (np.ndarray, list)): # either boolean or fancy integer index return self._getitem_array(key) elif isinstance(key, DataFrame): @@ -1879,14 +1882,7 @@ def __getitem__(self, key): return self._get_item_cache(key) def _getitem_slice(self, key): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - return self._slice(indexer, axis=0) + return self._slice(key, axis=0) def _getitem_array(self, key): # also raises Exception if object array with NA values @@ -1982,10 +1978,12 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) def __setitem__(self, key, value): - if isinstance(key, slice): - # slice rows - self._setitem_slice(key, value) - elif isinstance(key, (np.ndarray, list)): + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._setitem_slice(indexer, value) + + if isinstance(key, (np.ndarray, list)): self._setitem_array(key, value) elif isinstance(key, DataFrame): self._setitem_frame(key, value) @@ -1994,14 +1992,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - self.ix._setitem_with_indexer(indexer, value) + self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): # also raises Exception if object array with NA values diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 16259fd39c0a9..3d4ac12a4efd7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -827,6 +827,30 @@ def _convert_key(self, key): _eps = np.finfo('f4').eps +def _convert_to_index_sliceable(obj, key): + """ if we are index sliceable, then return my slicer, otherwise return None """ + idx = obj.index + if isinstance(key, slice): + idx_type = idx.inferred_type + if idx_type == 'floating': + indexer = obj.ix._convert_to_indexer(key, axis=0) + elif idx_type == 'integer' or _is_index_slice(key): + indexer = key + else: + indexer = obj.ix._convert_to_indexer(key, axis=0) + return indexer + + elif isinstance(key, basestring): + + # we need a timelike key here + if idx.is_all_dates: + try: + return idx._get_string_slice(key) + except: + return None + + return None + def _is_index_slice(obj): def _is_valid_index(x): return (com.is_integer(x) or com.is_float(x) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3bf29af8581a9..a43c80bf22158 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -34,11 +34,11 @@ class Resolution(object): @classmethod def get_str(cls, reso): - return {RESO_US: 'microsecond', - RESO_SEC: 'second', - RESO_MIN: 'minute', - RESO_HR: 'hour', - RESO_DAY: 'day'}.get(reso, 'day') + return {cls.RESO_US: 'microsecond', + cls.RESO_SEC: 'second', + cls.RESO_MIN: 'minute', + cls.RESO_HR: 'hour', + cls.RESO_DAY: 'day'}.get(reso, 'day') def get_reso_string(reso): diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index c91a1ebd5568f..25c94900d159c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -1042,9 +1042,6 @@ def intersection(self, other): return self._view_like(left_chunk) def _partial_date_slice(self, reso, parsed): - if not self.is_monotonic: - raise TimeSeriesError('Partial indexing only valid for ordered ' - 'time series.') if reso == 'year': t1 = Timestamp(datetime(parsed.year, 1, 1), tz=self.tz) @@ -1079,11 +1076,19 @@ def _partial_date_slice(self, reso, parsed): tz=self.tz).value - 1) else: raise KeyError + stamps = self.asi8 - left = stamps.searchsorted(t1.value, side='left') - right = stamps.searchsorted(t2.value, side='right') - return slice(left, right) + + if self.is_monotonic: + + # a monotonic (sorted) series can be sliced + left = stamps.searchsorted(t1.value, side='left') + right = stamps.searchsorted(t2.value, side='right') + return slice(left, right) + + # try to find a the dates + return ((stamps>=t1.value) & (stamps<=t2.value)).nonzero()[0] def _possibly_promote(self, other): if other.inferred_type == 'date': diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 2ec4fd7ffd67b..3ebbd10395784 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -18,6 +18,7 @@ import pandas.core.datetools as datetools import pandas.tseries.offsets as offsets import pandas.tseries.frequencies as fmod +from pandas.tseries.index import TimeSeriesError import pandas as pd from pandas.util.testing import assert_series_equal, assert_almost_equal @@ -168,13 +169,63 @@ def test_indexing_over_size_cutoff(self): finally: _index._SIZE_CUTOFF = old_cutoff + def test_indexing_unordered(self): + + # GH 2437 (series) + from pandas import concat + rng = date_range(start='2011-01-01', end='2011-01-15') + ts = Series(randn(len(rng)), index=rng) + ts2 = concat([ts[0:4],ts[-4:],ts[4:-4]]) + + for t in ts.index: + s = str(t) + expected = ts[t] + result = ts2[t] + self.assertTrue(expected == result) + + result = ts2['2011'].sort_index() + expected = ts['2011'] + assert_series_equal(result,expected) + + # diff freq + rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') + ts = Series(np.arange(len(rng)), index=rng) + ts = ts.take(np.random.permutation(20)) + + result = ts['2005'] + for t in result.index: + self.assertTrue(t.year == 2005) + + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + + df = DataFrame(dict(A = ts)) + result = df['2001']['A'] + assert_series_equal(expected,result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + + df.loc['2001','A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected,result) + + def assert_range_equal(left, right): assert(left.equals(right)) assert(left.freq == right.freq) assert(left.tz == right.tz) - class TestTimeSeries(unittest.TestCase): _multiprocess_can_split_ = True @@ -2017,13 +2068,6 @@ def test_partial_slice_minutely(self): self.assert_(s['2005-1-1 23:59:00'] == s.ix[0]) self.assertRaises(Exception, s.__getitem__, '2004-12-31 00:00:00') - def test_partial_not_monotonic(self): - rng = date_range(datetime(2005, 1, 1), periods=20, freq='M') - ts = Series(np.arange(len(rng)), index=rng) - ts = ts.take(np.random.permutation(20)) - - self.assertRaises(Exception, ts.__getitem__, '2005') - def test_date_range_normalize(self): snap = datetime.today() n = 50