Skip to content

ENH: automatic rpy2 instance conversion #7385

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 55 additions & 19 deletions pandas/rpy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,38 +10,69 @@

import pandas as pd
import pandas.core.common as com
import pandas.util.testing as _test

from rpy2.robjects.packages import importr
from rpy2.robjects import r
import rpy2.robjects as robj

import itertools as IT


__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
'convert_to_r_matrix']
'convert_to_r_matrix', 'r']


def _assign(attr, obj):
if isinstance(obj, (pd.DataFrame, pd.Series)):
obj = convert_to_r_dataframe(obj)
return robj.r.assign(attr, obj)


# Unable to subclass robjects.R because
# it has special creation process using rinterface
class _RPandas(object):

def __getattribute__(self, attr):
if attr == 'assign':
return _assign
return getattr(robj.r, attr)

def __getitem__(self, item):
result = robj.r[item]
try:
result = convert_robj(result)
except TypeError:
pass
return result

def __str__(self):
return str(robj.r)

def __call__(self, string):
return robj.r(string)


r = _RPandas()


def load_data(name, package=None, convert=True):
if package:
importr(package)

r.data(name)
robj.r.data(name)

robj = r[name]
r_obj = robj.r[name]

if convert:
return convert_robj(robj)
return convert_robj(r_obj)
else:
return robj
return r_obj


def _rclass(obj):
"""
Return R class name for input object
"""
return r['class'](obj)[0]
return robj.r['class'](obj)[0]


def _is_null(obj):
Expand All @@ -54,12 +85,12 @@ def _convert_list(obj):
"""
try:
values = [convert_robj(x) for x in obj]
keys = r['names'](obj)
keys = robj.r['names'](obj)
return dict(zip(keys, values))
except TypeError:
# For state.division and state.region
factors = list(r['factor'](obj))
level = list(r['levels'](obj))
factors = list(robj.r['factor'](obj))
level = list(robj.r['levels'](obj))
result = [level[index-1] for index in factors]
return result

Expand All @@ -77,9 +108,9 @@ def _list(item):
# For iris3, HairEyeColor, UCBAdmissions, Titanic
dim = list(obj.dim)
values = np.array(list(obj))
names = r['dimnames'](obj)
names = robj.r['dimnames'](obj)
try:
columns = list(r['names'](names))[::-1]
columns = list(robj.r['names'](names))[::-1]
except TypeError:
columns = ['X{:d}'.format(i) for i in range(len(names))][::-1]
columns.append('value')
Expand All @@ -98,18 +129,18 @@ def _convert_vector(obj):
# Check if the vector has extra information attached to it that can be used
# as an index
try:
attributes = set(r['attributes'](obj).names)
attributes = set(robj.r['attributes'](obj).names)
except AttributeError:
return list(obj)
if 'names' in attributes:
return pd.Series(list(obj), index=r['names'](obj))
return pd.Series(list(obj), index=robj.r['names'](obj))
elif 'tsp' in attributes:
return pd.Series(list(obj), index=r['time'](obj))
return pd.Series(list(obj), index=robj.r['time'](obj))
elif 'labels' in attributes:
return pd.Series(list(obj), index=r['labels'](obj))
return pd.Series(list(obj), index=robj.r['labels'](obj))
if _rclass(obj) == 'dist':
# For 'eurodist'. WARNING: This results in a DataFrame, not a Series or list.
matrix = r['as.matrix'](obj)
matrix = robj.r['as.matrix'](obj)
return convert_robj(matrix)
else:
return list(obj)
Expand Down Expand Up @@ -167,7 +198,7 @@ def _convert_Matrix(mat):
rows = mat.rownames

columns = None if _is_null(columns) else list(columns)
index = r['time'](mat) if _is_null(rows) else list(rows)
index = robj.r['time'](mat) if _is_null(rows) else list(rows)
return pd.DataFrame(np.array(mat), index=_check_int(index),
columns=columns)

Expand Down Expand Up @@ -310,6 +341,10 @@ def convert_to_r_dataframe(df, strings_as_factors=False):

columns = rlc.OrdDict()

if isinstance(df, pd.Series):
name = df.name or 'X0'
df = pd.DataFrame(df, columns=[name])

# FIXME: This doesn't handle MultiIndex

for column in df:
Expand Down Expand Up @@ -365,5 +400,6 @@ def convert_to_r_matrix(df, strings_as_factors=False):

return r_matrix


if __name__ == '__main__':
pass
41 changes: 41 additions & 0 deletions pandas/rpy/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,25 @@ def test_convert_r_dataframe(self):
else:
assert original == converted

def test_convert_r_dataframe_series(self):

s_noname = tm.makeFloatSeries()
s_name = tm.makeFloatSeries()
s_name.name = 'Test'

for series, key in [(s_noname, 'X0'), (s_name, s_name.name)]:

r_dataframe = com.convert_to_r_dataframe(series)

assert np.array_equal(
com.convert_robj(r_dataframe.rownames), series.index)
assert np.array_equal(
com.convert_robj(r_dataframe.colnames), np.array([key]))

result = com.convert_robj(r_dataframe.rx2(key))
result = np.array(result)
assert np.array_equal(result, series.values)

def test_convert_r_matrix(self):

is_na = robj.baseenv.get("is.na")
Expand Down Expand Up @@ -207,6 +226,28 @@ def test_factor(self):
result = com.load_data(name)
assert np.equal(result, factors)

def test_assign_revert(self):
df = tm.makeDataFrame()
com.r.assign('df', df)
# test R function call
com.r('head(df)')
result = com.r['df']
tm.assert_frame_equal(df, result)

df = tm.makeTimeDataFrame()
com.r.assign('df', df)
result = com.r['df']
result.index = pd.DatetimeIndex(result.index)
tm.assert_frame_equal(df, result)

s = tm.makeFloatSeries()
s.name = 'Test'
com.r.assign('s', s)
result = com.r['s']
expected = pd.DataFrame(s, columns=['Test'])
tm.assert_frame_equal(expected, result)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
# '--with-coverage', '--cover-package=pandas.core'],
Expand Down