From d3d24fa1687f605b71c1e5bc31f6abfd84a89156 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 22 Mar 2013 09:09:43 -0400 Subject: [PATCH 1/2] ENH: GH3070, extend slicing semantics for datelike indexed DataFrames with a string to work like TimeSeries (e.g. df['2001'] works) --- RELEASE.rst | 12 ++++++++ doc/source/v0.11.0.txt | 12 ++++++++ pandas/core/frame.py | 41 ++++++++++--------------- pandas/core/indexing.py | 24 +++++++++++++++ pandas/tseries/tests/test_timeseries.py | 24 ++++++++++++++- 5 files changed, 87 insertions(+), 26 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 65411b9b69cad..51a956aaa9cc0 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -98,6 +98,17 @@ pandas 0.11.0 histograms. (GH2710_). - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. + - Allow selection semantics for via a string with a datelike index to work in both + Series and DataFrames (GH3070_) + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] **API Changes** @@ -263,6 +274,7 @@ pandas 0.11.0 .. _GH3059: https://github.com/pydata/pandas/issues/3059 .. _GH2993: https://github.com/pydata/pandas/issues/2993 .. _GH3115: https://github.com/pydata/pandas/issues/3115 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 pandas 0.10.1 ============= diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index f13fb50f1aa3c..87b861a45dbae 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -245,6 +245,17 @@ Enhancements - You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_) + - You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_) + + .. ipython:: python + + idx = date_range("2001-10-1", periods=5, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + ts['2001'] + + df = DataFrame(dict(A = ts)) + df['2001'] + - ``Squeeze`` to possibly remove length 1 dimensions from an object. .. ipython:: python @@ -313,3 +324,4 @@ on GitHub for a complete list. .. _GH3011: https://github.com/pydata/pandas/issues/3011 .. _GH3076: https://github.com/pydata/pandas/issues/3076 .. _GH3059: https://github.com/pydata/pandas/issues/3059 +.. _GH3070: https://github.com/pydata/pandas/issues/3070 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6ef2ad642612c..b47b77fdaeb6c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -28,7 +28,7 @@ from pandas.core.generic import NDFrame from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels, - _is_index_slice, _check_bool_indexer, + _convert_to_index_sliceable, _check_bool_indexer, _maybe_convert_indices) from pandas.core.internals import BlockManager, make_block, form_blocks from pandas.core.series import Series, _radd_compat @@ -1864,10 +1864,13 @@ def iget_value(self, i, j): return self.iat[i,j] def __getitem__(self, key): - if isinstance(key, slice): - # slice rows - return self._getitem_slice(key) - elif isinstance(key, (np.ndarray, list)): + + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._getitem_slice(indexer) + + if isinstance(key, (np.ndarray, list)): # either boolean or fancy integer index return self._getitem_array(key) elif isinstance(key, DataFrame): @@ -1879,14 +1882,7 @@ def __getitem__(self, key): return self._get_item_cache(key) def _getitem_slice(self, key): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - return self._slice(indexer, axis=0) + return self._slice(key, axis=0) def _getitem_array(self, key): # also raises Exception if object array with NA values @@ -1982,10 +1978,12 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) def __setitem__(self, key, value): - if isinstance(key, slice): - # slice rows - self._setitem_slice(key, value) - elif isinstance(key, (np.ndarray, list)): + # see if we can slice the rows + indexer = _convert_to_index_sliceable(self, key) + if indexer is not None: + return self._setitem_slice(indexer, value) + + if isinstance(key, (np.ndarray, list)): self._setitem_array(key, value) elif isinstance(key, DataFrame): self._setitem_frame(key, value) @@ -1994,14 +1992,7 @@ def __setitem__(self, key, value): self._set_item(key, value) def _setitem_slice(self, key, value): - idx_type = self.index.inferred_type - if idx_type == 'floating': - indexer = self.ix._convert_to_indexer(key, axis=0) - elif idx_type == 'integer' or _is_index_slice(key): - indexer = key - else: - indexer = self.ix._convert_to_indexer(key, axis=0) - self.ix._setitem_with_indexer(indexer, value) + self.ix._setitem_with_indexer(key, value) def _setitem_array(self, key, value): # also raises Exception if object array with NA values diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 16259fd39c0a9..3d4ac12a4efd7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -827,6 +827,30 @@ def _convert_key(self, key): _eps = np.finfo('f4').eps +def _convert_to_index_sliceable(obj, key): + """ if we are index sliceable, then return my slicer, otherwise return None """ + idx = obj.index + if isinstance(key, slice): + idx_type = idx.inferred_type + if idx_type == 'floating': + indexer = obj.ix._convert_to_indexer(key, axis=0) + elif idx_type == 'integer' or _is_index_slice(key): + indexer = key + else: + indexer = obj.ix._convert_to_indexer(key, axis=0) + return indexer + + elif isinstance(key, basestring): + + # we need a timelike key here + if idx.is_all_dates: + try: + return idx._get_string_slice(key) + except: + return None + + return None + def _is_index_slice(obj): def _is_valid_index(x): return (com.is_integer(x) or com.is_float(x) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 6155590100452..43c81934aa182 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -196,12 +196,34 @@ def test_indexing_unordered(self): for t in result.index: self.assertTrue(t.year == 2005) + def test_indexing(self): + + idx = date_range("2001-1-1", periods=20, freq='M') + ts = Series(np.random.rand(len(idx)),index=idx) + + # getting + + # GH 3070, make sure semantics work on Series/Frame + expected = ts['2001'] + + df = DataFrame(dict(A = ts)) + result = df['2001']['A'] + assert_series_equal(expected,result) + + # setting + ts['2001'] = 1 + expected = ts['2001'] + + df.loc['2001','A'] = 1 + + result = df['2001']['A'] + assert_series_equal(expected,result) + def assert_range_equal(left, right): assert(left.equals(right)) assert(left.freq == right.freq) assert(left.tz == right.tz) - class TestTimeSeries(unittest.TestCase): _multiprocess_can_split_ = True From 914625c4755cc4ba387143785e9f954321062340 Mon Sep 17 00:00:00 2001 From: jreback Date: Fri, 22 Mar 2013 09:15:03 -0400 Subject: [PATCH 2/2] DOC: cookbook examples --- RELEASE.rst | 2 +- doc/source/cookbook.rst | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/RELEASE.rst b/RELEASE.rst index 51a956aaa9cc0..224a9846bf864 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -98,7 +98,7 @@ pandas 0.11.0 histograms. (GH2710_). - DataFrame.from_records now accepts not only dicts but any instance of the collections.Mapping ABC. - - Allow selection semantics for via a string with a datelike index to work in both + - Allow selection semantics via a string with a datelike index to work in both Series and DataFrames (GH3070_) .. ipython:: python diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 0a55d78dd24c3..3bc80a36f5561 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -105,6 +105,9 @@ Expanding Data `Alignment and to-date `__ +`Rolling Computation window based on values instead of counts +`__ + Splitting ~~~~~~~~~ @@ -171,6 +174,9 @@ CSV `Reading the first few lines of a frame `__ +`Inferring dtypes from a file +`__ + SQL ~~~