diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index 60ec7de5c4d8e..69fb5ed06a722 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -274,13 +274,15 @@ Enhancements - added option `display.chop_threshold` to control display of small numerical values. (GH2739_) + - added access to rows/columns via attributes, supporting tab-completion + in IPython, under x.r. x.c.. Originally GH1904_. + - added option `display.max_info_rows` to prevent verbose_info from being calculated for frames above 1M rows (configurable). (GH2807_, GH2918_) - value_counts() now accepts a "normalize" argument, for normalized histograms. (GH2710_). - Bug Fixes ~~~~~~~~~ @@ -327,6 +329,7 @@ on GitHub for a complete list. .. _GH2898: https://github.com/pydata/pandas/issues/2898 .. _GH2978: https://github.com/pydata/pandas/issues/2978 .. _GH2739: https://github.com/pydata/pandas/issues/2739 +.. _GH1904: https://github.com/pydata/pandas/issues/1904 .. _GH2710: https://github.com/pydata/pandas/issues/2710 .. _GH2806: https://github.com/pydata/pandas/issues/2806 .. _GH2807: https://github.com/pydata/pandas/issues/2807 diff --git a/pandas/core/common.py b/pandas/core/common.py index a3e8c09839891..565bdc1c9c582 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1206,6 +1206,92 @@ def _long_prod(vals): return result +class PropertyLookupHelper(object): + """Util class for attribute access (and tab-completion) to pandas data objs + + list_func: f(parent, limit,axis) -> list of keys, no longer then limit if it's int + get_func: f(parent,key,axis) -> data associated with key on given axis + set_func: f(parent,key, val,axis) -> data associated with key on given axis + parent: an object. In pandas this will be series/frame + + Example use (from Series class): + + class Series(object): + ... + @property + def r(self): + return com.PropertyLookupHelper(self, + lambda parent: list(parent.index), + lambda parent,k: parent.xs(k,0) ) + + >>> s = Series(range(3),index=['a','b','c'] + >>> s.r. + ['a','b','c'] + >>> s.r.a + 0 + + """ + def __init__(self,parent, list_func=None, get_func=None, + set_func=None, axis=None, limit=None): + + def __list_func(parent, limit, axis): + base = parent.axes[axis] + try: # MultiIndex + base = base.levels [0] + except AttributeError: # no `levels` + pass + + if limit: + base = base[:limit] + + return list(base) + + def __get_func(parent, k,axis): + s=tuple([slice(None)]*(axis)+[k]) + s = s[0] if len(s) == 1 else s + return parent.ix[s] + + def __set_func(parent,k,v,axis): + # convert single row dataframe to ndarray + # for df row multindex, df.a = df.a to do what you'd expect + try: + v = v.values + except: + pass + + s=tuple([slice(None)]*(axis)+[k]) + s = s[0] if len(s) == 1 else s + parent.ix[s] = v + + if axis is None and \ + (list_func is None or get_func is None or set_func is None): + raise AssertionError("When using default list/get/set, you must specify axis") + + super(PropertyLookupHelper,self).__setattr__('axis',axis) + super(PropertyLookupHelper,self).__setattr__('parent',parent) + super(PropertyLookupHelper,self).__setattr__('list_func',list_func or __list_func) + super(PropertyLookupHelper,self).__setattr__('get_func',get_func or __get_func) + super(PropertyLookupHelper,self).__setattr__('set_func',set_func or __set_func) + super(PropertyLookupHelper,self).__setattr__('limit',limit) + + def __getattr__(self,k): + get_func = super(PropertyLookupHelper,self).__getattribute__('get_func') + axis = super(PropertyLookupHelper,self).__getattribute__('axis') + return get_func(self.parent,k,axis) + + def __setattr__(self,k,val): + self.set_func(self.parent, k, val, self.axis) + + def __dir__(self): + import tokenize, keyword, re + axis = super(PropertyLookupHelper,self).__getattribute__('axis') + return [x for x in self.list_func(self.parent,self.limit,axis) + if isinstance(x,basestring) and + bool(re.match('^' + tokenize.Name + '$', x)) and + not keyword.iskeyword(x) and + not x == 'print' ] # PY3 + + class groupby(dict): """ A simple groupby different from the one in itertools. diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ee586a2101f62..b93f29f246d62 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1893,6 +1893,14 @@ def set_value(self, index, col, value): return result.set_value(index, col, value) + @property + def r(self): + return com.PropertyLookupHelper(self, axis=0,limit=1000000) + + @property + def c(self): + return com.PropertyLookupHelper(self, axis=1,limit=1000000) + def irow(self, i, copy=False): return self._ixs(i,axis=0) diff --git a/pandas/core/series.py b/pandas/core/series.py index c6fe396b08867..36349437ae328 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -559,6 +559,10 @@ def ix(self): return self._ix + @property + def r(self): + return com.PropertyLookupHelper(self,axis=0,limit=1000000) + def _xs(self, key, axis=0, level=None, copy=True): return self.__getitem__(key) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 1c30dfd1abced..1224066a56ace 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -26,7 +26,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, - assert_frame_equal) + assert_frame_equal, + makeCustomDataframe as mkdf) from pandas.util import py3compat from pandas.util.compat import OrderedDict @@ -1557,6 +1558,73 @@ def test_iget_value(self): expected = self.frame.get_value(row, col) assert_almost_equal(result, expected) + def test_attrib_data_access(self): + df = mkdf(10,5) + self.assertEqual(len(dir(df.r)), 10) + r = df.r.R_l0_g4 + assert_series_equal(r, df.irow(4)) + + c = df.c.C_l0_g2 + self.assertEqual(len(dir(df.c)), 5) + assert_series_equal(c, df.icol(2)) + + # exclude keywords and illegal identifiers + df = DataFrame([[1,2,3],[3,4,5],[6,7,8],[9,10,11]], + index=['a','print','a,b','foo bar'], + columns=['yield','b','c,d']) + self.assertEqual(len(dir(df.r)), 1) + self.assertEqual(len(dir(df.c)), 1) + self.assertTrue('print' not in dir(df.r)) + self.assertTrue('a,b' not in dir(df.r)) + self.assertTrue('yield' not in dir(df.c)) + self.assertTrue('c,d' not in dir(df.c)) + + # recursive access + df=mkdf(4,2,c_idx_nlevels=2,r_idx_nlevels=2) + assert_array_equal(df.r.R_l0_g0.r.R_l1_g0.values, + df.ix['R_l0_g0', 'R_l1_g0'].values) + assert_array_equal(df.c.C_l0_g0.c.C_l1_g0.values, + df.xs(('C_l0_g0','C_l1_g0'),1).values) + + # test setting rows + df=mkdf(10,5,r_idx_nlevels=1) + df.r.R_l0_g0 =df.r.R_l0_g1 + assert_series_equal(df.ix['R_l0_g0'],df.ix['R_l0_g1']) + + df=mkdf(10,5,r_idx_nlevels=2) + df.r.R_l0_g0 =df.r.R_l0_g1 + assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values) + + df=mkdf(10,5,r_idx_nlevels=3) + df.r.R_l0_g0 =df.r.R_l0_g1 + assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values) + + df=mkdf(10,5,r_idx_nlevels=2) + df.r.R_l0_g0.r.R_l1_g0 = df.r.R_l0_g1.r.R_l1_g1 + assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values) + + df=mkdf(10,5,r_idx_nlevels=3) + df.r.R_l0_g0.r.R_l1_g0 = df.r.R_l0_g1.r.R_l1_g1 + assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values) + + # test setting cols + df=mkdf(10,5,r_idx_nlevels=1) + df.c.C_l0_g0 = df.c.C_l0_g1 + assert_series_equal(df['C_l0_g0'],df['C_l0_g1']) + + df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=2) + df.c.C_l0_g0 = df.c.C_l0_g1 + assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values) + + df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=2) + df.c.C_l0_g0.c.C_l1_g0 = df.c.C_l0_g1.c.C_l1_g1 + assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values) + + df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=3) + df.c.C_l0_g0.c.C_l1_g0 = df.c.C_l0_g1.c.C_l1_g1 + assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values) + + def test_nested_exception(self): # Ignore the strange way of triggering the problem # (which may get fixed), it's just a way to trigger diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 74b41f4ef1cd7..8bcb52b1ab9a5 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1000,6 +1000,22 @@ def test_basic_setitem_with_labels(self): self.assertRaises(Exception, s.__setitem__, inds_notfound, 0) self.assertRaises(Exception, s.__setitem__, arr_inds_notfound, 0) + def test_attrib_data_access(self): + s= Series(range(3),index=['a','b','c']) + self.assertEqual(len(dir(s.r)), 3) + self.assertEqual(s.r.b, 1) + + s= Series(range(3),index=['a','b','print']) + self.assertEqual(len(dir(s.r)), 2) + self.assertEqual(s.r.b, 1) + self.assertTrue('print' not in dir(s.r)) + + # test setting + s= Series(range(3),index=['a','b','c']) + s.r.a = s.r.b + self.assertEqual(s.ix['a'],s.ix['b']) + + def test_ix_getitem(self): inds = self.series.index[[3, 4, 7]] assert_series_equal(self.series.ix[inds], self.series.reindex(inds))