Skip to content

ENH: GH3070 allow string selection on a DataFrame with a datelike index, to have partial_string semantics (like Series) #3137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 22, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,17 @@ pandas 0.11.0
histograms. (GH2710_).
- DataFrame.from_records now accepts not only dicts but any instance of
the collections.Mapping ABC.
- Allow selection semantics via a string with a datelike index to work in both
Series and DataFrames (GH3070_)

.. ipython:: python

idx = date_range("2001-10-1", periods=5, freq='M')
ts = Series(np.random.rand(len(idx)),index=idx)
ts['2001']

df = DataFrame(dict(A = ts))
df['2001']


**API Changes**
Expand Down Expand Up @@ -263,6 +274,7 @@ pandas 0.11.0
.. _GH3059: https://github.com/pydata/pandas/issues/3059
.. _GH2993: https://github.com/pydata/pandas/issues/2993
.. _GH3115: https://github.com/pydata/pandas/issues/3115
.. _GH3070: https://github.com/pydata/pandas/issues/3070

pandas 0.10.1
=============
Expand Down
6 changes: 6 additions & 0 deletions doc/source/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ Expanding Data
`Alignment and to-date
<http://stackoverflow.com/questions/15489011/python-time-series-alignment-and-to-date-functions>`__

`Rolling Computation window based on values instead of counts
<http://stackoverflow.com/questions/14300768/pandas-rolling-computation-with-window-based-on-values-instead-of-counts>`__

Splitting
~~~~~~~~~

Expand Down Expand Up @@ -171,6 +174,9 @@ CSV
`Reading the first few lines of a frame
<http://stackoverflow.com/questions/15008970/way-to-read-first-few-lines-for-pandas-dataframe>`__

`Inferring dtypes from a file
<http://stackoverflow.com/questions/15555005/get-inferred-dataframe-types-iteratively-using-chunksize>`__

SQL
~~~

Expand Down
12 changes: 12 additions & 0 deletions doc/source/v0.11.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,17 @@ Enhancements

- You can now select timestamps from an *unordered* timeseries similarly to an *ordered* timeseries (GH2437_)

- You can now select with a string from a DataFrame with a datelike index, in a similar way to a Series (GH3070_)

.. ipython:: python

idx = date_range("2001-10-1", periods=5, freq='M')
ts = Series(np.random.rand(len(idx)),index=idx)
ts['2001']

df = DataFrame(dict(A = ts))
df['2001']

- ``Squeeze`` to possibly remove length 1 dimensions from an object.

.. ipython:: python
Expand Down Expand Up @@ -313,3 +324,4 @@ on GitHub for a complete list.
.. _GH3011: https://github.com/pydata/pandas/issues/3011
.. _GH3076: https://github.com/pydata/pandas/issues/3076
.. _GH3059: https://github.com/pydata/pandas/issues/3059
.. _GH3070: https://github.com/pydata/pandas/issues/3070
41 changes: 16 additions & 25 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pandas.core.generic import NDFrame
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (_NDFrameIndexer, _maybe_droplevels,
_is_index_slice, _check_bool_indexer,
_convert_to_index_sliceable, _check_bool_indexer,
_maybe_convert_indices)
from pandas.core.internals import BlockManager, make_block, form_blocks
from pandas.core.series import Series, _radd_compat
Expand Down Expand Up @@ -1864,10 +1864,13 @@ def iget_value(self, i, j):
return self.iat[i,j]

def __getitem__(self, key):
if isinstance(key, slice):
# slice rows
return self._getitem_slice(key)
elif isinstance(key, (np.ndarray, list)):

# see if we can slice the rows
indexer = _convert_to_index_sliceable(self, key)
if indexer is not None:
return self._getitem_slice(indexer)

if isinstance(key, (np.ndarray, list)):
# either boolean or fancy integer index
return self._getitem_array(key)
elif isinstance(key, DataFrame):
Expand All @@ -1879,14 +1882,7 @@ def __getitem__(self, key):
return self._get_item_cache(key)

def _getitem_slice(self, key):
idx_type = self.index.inferred_type
if idx_type == 'floating':
indexer = self.ix._convert_to_indexer(key, axis=0)
elif idx_type == 'integer' or _is_index_slice(key):
indexer = key
else:
indexer = self.ix._convert_to_indexer(key, axis=0)
return self._slice(indexer, axis=0)
return self._slice(key, axis=0)

def _getitem_array(self, key):
# also raises Exception if object array with NA values
Expand Down Expand Up @@ -1982,10 +1978,12 @@ def __setattr__(self, name, value):
object.__setattr__(self, name, value)

def __setitem__(self, key, value):
if isinstance(key, slice):
# slice rows
self._setitem_slice(key, value)
elif isinstance(key, (np.ndarray, list)):
# see if we can slice the rows
indexer = _convert_to_index_sliceable(self, key)
if indexer is not None:
return self._setitem_slice(indexer, value)

if isinstance(key, (np.ndarray, list)):
self._setitem_array(key, value)
elif isinstance(key, DataFrame):
self._setitem_frame(key, value)
Expand All @@ -1994,14 +1992,7 @@ def __setitem__(self, key, value):
self._set_item(key, value)

def _setitem_slice(self, key, value):
idx_type = self.index.inferred_type
if idx_type == 'floating':
indexer = self.ix._convert_to_indexer(key, axis=0)
elif idx_type == 'integer' or _is_index_slice(key):
indexer = key
else:
indexer = self.ix._convert_to_indexer(key, axis=0)
self.ix._setitem_with_indexer(indexer, value)
self.ix._setitem_with_indexer(key, value)

def _setitem_array(self, key, value):
# also raises Exception if object array with NA values
Expand Down
24 changes: 24 additions & 0 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,30 @@ def _convert_key(self, key):
_eps = np.finfo('f4').eps


def _convert_to_index_sliceable(obj, key):
""" if we are index sliceable, then return my slicer, otherwise return None """
idx = obj.index
if isinstance(key, slice):
idx_type = idx.inferred_type
if idx_type == 'floating':
indexer = obj.ix._convert_to_indexer(key, axis=0)
elif idx_type == 'integer' or _is_index_slice(key):
indexer = key
else:
indexer = obj.ix._convert_to_indexer(key, axis=0)
return indexer

elif isinstance(key, basestring):

# we need a timelike key here
if idx.is_all_dates:
try:
return idx._get_string_slice(key)
except:
return None

return None

def _is_index_slice(obj):
def _is_valid_index(x):
return (com.is_integer(x) or com.is_float(x)
Expand Down
24 changes: 23 additions & 1 deletion pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,34 @@ def test_indexing_unordered(self):
for t in result.index:
self.assertTrue(t.year == 2005)

def test_indexing(self):

idx = date_range("2001-1-1", periods=20, freq='M')
ts = Series(np.random.rand(len(idx)),index=idx)

# getting

# GH 3070, make sure semantics work on Series/Frame
expected = ts['2001']

df = DataFrame(dict(A = ts))
result = df['2001']['A']
assert_series_equal(expected,result)

# setting
ts['2001'] = 1
expected = ts['2001']

df.loc['2001','A'] = 1

result = df['2001']['A']
assert_series_equal(expected,result)

def assert_range_equal(left, right):
assert(left.equals(right))
assert(left.freq == right.freq)
assert(left.tz == right.tz)


class TestTimeSeries(unittest.TestCase):
_multiprocess_can_split_ = True

Expand Down