Skip to content

Access row/col data via attributes #3045

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion doc/source/v0.11.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -274,13 +274,15 @@ Enhancements
- added option `display.chop_threshold` to control display of small numerical
values. (GH2739_)

- added access to rows/columns via attributes, supporting tab-completion
in IPython, under x.r.<row label> x.c.<col label>. Originally GH1904_.

- added option `display.max_info_rows` to prevent verbose_info from being
calculated for frames above 1M rows (configurable). (GH2807_, GH2918_)

- value_counts() now accepts a "normalize" argument, for normalized
histograms. (GH2710_).


Bug Fixes
~~~~~~~~~

Expand Down Expand Up @@ -327,6 +329,7 @@ on GitHub for a complete list.
.. _GH2898: https://github.com/pydata/pandas/issues/2898
.. _GH2978: https://github.com/pydata/pandas/issues/2978
.. _GH2739: https://github.com/pydata/pandas/issues/2739
.. _GH1904: https://github.com/pydata/pandas/issues/1904
.. _GH2710: https://github.com/pydata/pandas/issues/2710
.. _GH2806: https://github.com/pydata/pandas/issues/2806
.. _GH2807: https://github.com/pydata/pandas/issues/2807
Expand Down
86 changes: 86 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,92 @@ def _long_prod(vals):
return result


class PropertyLookupHelper(object):
"""Util class for attribute access (and tab-completion) to pandas data objs

list_func: f(parent, limit,axis) -> list of keys, no longer then limit if it's int
get_func: f(parent,key,axis) -> data associated with key on given axis
set_func: f(parent,key, val,axis) -> data associated with key on given axis
parent: an object. In pandas this will be series/frame

Example use (from Series class):

class Series(object):
...
@property
def r(self):
return com.PropertyLookupHelper(self,
lambda parent: list(parent.index),
lambda parent,k: parent.xs(k,0) )

>>> s = Series(range(3),index=['a','b','c']
>>> s.r.<tab>
['a','b','c']
>>> s.r.a
0

"""
def __init__(self,parent, list_func=None, get_func=None,
set_func=None, axis=None, limit=None):

def __list_func(parent, limit, axis):
base = parent.axes[axis]
try: # MultiIndex
base = base.levels [0]
except AttributeError: # no `levels`
pass

if limit:
base = base[:limit]

return list(base)

def __get_func(parent, k,axis):
s=tuple([slice(None)]*(axis)+[k])
s = s[0] if len(s) == 1 else s
return parent.ix[s]

def __set_func(parent,k,v,axis):
# convert single row dataframe to ndarray
# for df row multindex, df.a = df.a to do what you'd expect
try:
v = v.values
except:
pass

s=tuple([slice(None)]*(axis)+[k])
s = s[0] if len(s) == 1 else s
parent.ix[s] = v

if axis is None and \
(list_func is None or get_func is None or set_func is None):
raise AssertionError("When using default list/get/set, you must specify axis")

super(PropertyLookupHelper,self).__setattr__('axis',axis)
super(PropertyLookupHelper,self).__setattr__('parent',parent)
super(PropertyLookupHelper,self).__setattr__('list_func',list_func or __list_func)
super(PropertyLookupHelper,self).__setattr__('get_func',get_func or __get_func)
super(PropertyLookupHelper,self).__setattr__('set_func',set_func or __set_func)
super(PropertyLookupHelper,self).__setattr__('limit',limit)

def __getattr__(self,k):
get_func = super(PropertyLookupHelper,self).__getattribute__('get_func')
axis = super(PropertyLookupHelper,self).__getattribute__('axis')
return get_func(self.parent,k,axis)

def __setattr__(self,k,val):
self.set_func(self.parent, k, val, self.axis)

def __dir__(self):
import tokenize, keyword, re
axis = super(PropertyLookupHelper,self).__getattribute__('axis')
return [x for x in self.list_func(self.parent,self.limit,axis)
if isinstance(x,basestring) and
bool(re.match('^' + tokenize.Name + '$', x)) and
not keyword.iskeyword(x) and
not x == 'print' ] # PY3


class groupby(dict):
"""
A simple groupby different from the one in itertools.
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,14 @@ def set_value(self, index, col, value):

return result.set_value(index, col, value)

@property
def r(self):
return com.PropertyLookupHelper(self, axis=0,limit=1000000)

@property
def c(self):
return com.PropertyLookupHelper(self, axis=1,limit=1000000)

def irow(self, i, copy=False):
return self._ixs(i,axis=0)

Expand Down
4 changes: 4 additions & 0 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,10 @@ def ix(self):

return self._ix

@property
def r(self):
return com.PropertyLookupHelper(self,axis=0,limit=1000000)

def _xs(self, key, axis=0, level=None, copy=True):
return self.__getitem__(key)

Expand Down
70 changes: 69 additions & 1 deletion pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@

from pandas.util.testing import (assert_almost_equal,
assert_series_equal,
assert_frame_equal)
assert_frame_equal,
makeCustomDataframe as mkdf)
from pandas.util import py3compat
from pandas.util.compat import OrderedDict

Expand Down Expand Up @@ -1557,6 +1558,73 @@ def test_iget_value(self):
expected = self.frame.get_value(row, col)
assert_almost_equal(result, expected)

def test_attrib_data_access(self):
df = mkdf(10,5)
self.assertEqual(len(dir(df.r)), 10)
r = df.r.R_l0_g4
assert_series_equal(r, df.irow(4))

c = df.c.C_l0_g2
self.assertEqual(len(dir(df.c)), 5)
assert_series_equal(c, df.icol(2))

# exclude keywords and illegal identifiers
df = DataFrame([[1,2,3],[3,4,5],[6,7,8],[9,10,11]],
index=['a','print','a,b','foo bar'],
columns=['yield','b','c,d'])
self.assertEqual(len(dir(df.r)), 1)
self.assertEqual(len(dir(df.c)), 1)
self.assertTrue('print' not in dir(df.r))
self.assertTrue('a,b' not in dir(df.r))
self.assertTrue('yield' not in dir(df.c))
self.assertTrue('c,d' not in dir(df.c))

# recursive access
df=mkdf(4,2,c_idx_nlevels=2,r_idx_nlevels=2)
assert_array_equal(df.r.R_l0_g0.r.R_l1_g0.values,
df.ix['R_l0_g0', 'R_l1_g0'].values)
assert_array_equal(df.c.C_l0_g0.c.C_l1_g0.values,
df.xs(('C_l0_g0','C_l1_g0'),1).values)

# test setting rows
df=mkdf(10,5,r_idx_nlevels=1)
df.r.R_l0_g0 =df.r.R_l0_g1
assert_series_equal(df.ix['R_l0_g0'],df.ix['R_l0_g1'])

df=mkdf(10,5,r_idx_nlevels=2)
df.r.R_l0_g0 =df.r.R_l0_g1
assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values)

df=mkdf(10,5,r_idx_nlevels=3)
df.r.R_l0_g0 =df.r.R_l0_g1
assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values)

df=mkdf(10,5,r_idx_nlevels=2)
df.r.R_l0_g0.r.R_l1_g0 = df.r.R_l0_g1.r.R_l1_g1
assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values)

df=mkdf(10,5,r_idx_nlevels=3)
df.r.R_l0_g0.r.R_l1_g0 = df.r.R_l0_g1.r.R_l1_g1
assert_array_equal(df.ix['R_l0_g0'].values,df.ix['R_l0_g1'].values)

# test setting cols
df=mkdf(10,5,r_idx_nlevels=1)
df.c.C_l0_g0 = df.c.C_l0_g1
assert_series_equal(df['C_l0_g0'],df['C_l0_g1'])

df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=2)
df.c.C_l0_g0 = df.c.C_l0_g1
assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values)

df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=2)
df.c.C_l0_g0.c.C_l1_g0 = df.c.C_l0_g1.c.C_l1_g1
assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values)

df=mkdf(10,5,r_idx_nlevels=2,c_idx_nlevels=3)
df.c.C_l0_g0.c.C_l1_g0 = df.c.C_l0_g1.c.C_l1_g1
assert_array_equal(df['C_l0_g0'].values,df['C_l0_g1'].values)


def test_nested_exception(self):
# Ignore the strange way of triggering the problem
# (which may get fixed), it's just a way to trigger
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,22 @@ def test_basic_setitem_with_labels(self):
self.assertRaises(Exception, s.__setitem__, inds_notfound, 0)
self.assertRaises(Exception, s.__setitem__, arr_inds_notfound, 0)

def test_attrib_data_access(self):
s= Series(range(3),index=['a','b','c'])
self.assertEqual(len(dir(s.r)), 3)
self.assertEqual(s.r.b, 1)

s= Series(range(3),index=['a','b','print'])
self.assertEqual(len(dir(s.r)), 2)
self.assertEqual(s.r.b, 1)
self.assertTrue('print' not in dir(s.r))

# test setting
s= Series(range(3),index=['a','b','c'])
s.r.a = s.r.b
self.assertEqual(s.ix['a'],s.ix['b'])


def test_ix_getitem(self):
inds = self.series.index[[3, 4, 7]]
assert_series_equal(self.series.ix[inds], self.series.reindex(inds))
Expand Down