Skip to content

Commit 6a16303

Browse files
committed
ENH: automatic rpy2 instance conversion
1 parent 3f24b87 commit 6a16303

File tree

2 files changed

+96
-19
lines changed

2 files changed

+96
-19
lines changed

pandas/rpy/common.py

+55-19
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,69 @@
1010

1111
import pandas as pd
1212
import pandas.core.common as com
13-
import pandas.util.testing as _test
1413

1514
from rpy2.robjects.packages import importr
16-
from rpy2.robjects import r
1715
import rpy2.robjects as robj
1816

1917
import itertools as IT
2018

2119

2220
__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
23-
'convert_to_r_matrix']
21+
'convert_to_r_matrix', 'r']
22+
23+
24+
def _assign(attr, obj):
25+
if isinstance(obj, (pd.DataFrame, pd.Series)):
26+
obj = convert_to_r_dataframe(obj)
27+
return robj.r.assign(attr, obj)
28+
29+
30+
# Unable to subclass robjects.R because
31+
# it has special creation process using rinterface
32+
class _RPandas(object):
33+
34+
def __getattribute__(self, attr):
35+
if attr == 'assign':
36+
return _assign
37+
return getattr(robj.r, attr)
38+
39+
def __getitem__(self, item):
40+
result = robj.r[item]
41+
try:
42+
result = convert_robj(result)
43+
except TypeError:
44+
pass
45+
return result
46+
47+
def __str__(self):
48+
return str(robj.r)
49+
50+
def __call__(self, string):
51+
return robj.r(string)
52+
53+
54+
r = _RPandas()
2455

2556

2657
def load_data(name, package=None, convert=True):
2758
if package:
2859
importr(package)
2960

30-
r.data(name)
61+
robj.r.data(name)
3162

32-
robj = r[name]
63+
r_obj = robj.r[name]
3364

3465
if convert:
35-
return convert_robj(robj)
66+
return convert_robj(r_obj)
3667
else:
37-
return robj
68+
return r_obj
3869

3970

4071
def _rclass(obj):
4172
"""
4273
Return R class name for input object
4374
"""
44-
return r['class'](obj)[0]
75+
return robj.r['class'](obj)[0]
4576

4677

4778
def _is_null(obj):
@@ -54,12 +85,12 @@ def _convert_list(obj):
5485
"""
5586
try:
5687
values = [convert_robj(x) for x in obj]
57-
keys = r['names'](obj)
88+
keys = robj.r['names'](obj)
5889
return dict(zip(keys, values))
5990
except TypeError:
6091
# For state.division and state.region
61-
factors = list(r['factor'](obj))
62-
level = list(r['levels'](obj))
92+
factors = list(robj.r['factor'](obj))
93+
level = list(robj.r['levels'](obj))
6394
result = [level[index-1] for index in factors]
6495
return result
6596

@@ -77,9 +108,9 @@ def _list(item):
77108
# For iris3, HairEyeColor, UCBAdmissions, Titanic
78109
dim = list(obj.dim)
79110
values = np.array(list(obj))
80-
names = r['dimnames'](obj)
111+
names = robj.r['dimnames'](obj)
81112
try:
82-
columns = list(r['names'](names))[::-1]
113+
columns = list(robj.r['names'](names))[::-1]
83114
except TypeError:
84115
columns = ['X{:d}'.format(i) for i in range(len(names))][::-1]
85116
columns.append('value')
@@ -98,18 +129,18 @@ def _convert_vector(obj):
98129
# Check if the vector has extra information attached to it that can be used
99130
# as an index
100131
try:
101-
attributes = set(r['attributes'](obj).names)
132+
attributes = set(robj.r['attributes'](obj).names)
102133
except AttributeError:
103134
return list(obj)
104135
if 'names' in attributes:
105-
return pd.Series(list(obj), index=r['names'](obj))
136+
return pd.Series(list(obj), index=robj.r['names'](obj))
106137
elif 'tsp' in attributes:
107-
return pd.Series(list(obj), index=r['time'](obj))
138+
return pd.Series(list(obj), index=robj.r['time'](obj))
108139
elif 'labels' in attributes:
109-
return pd.Series(list(obj), index=r['labels'](obj))
140+
return pd.Series(list(obj), index=robj.r['labels'](obj))
110141
if _rclass(obj) == 'dist':
111142
# For 'eurodist'. WARNING: This results in a DataFrame, not a Series or list.
112-
matrix = r['as.matrix'](obj)
143+
matrix = robj.r['as.matrix'](obj)
113144
return convert_robj(matrix)
114145
else:
115146
return list(obj)
@@ -167,7 +198,7 @@ def _convert_Matrix(mat):
167198
rows = mat.rownames
168199

169200
columns = None if _is_null(columns) else list(columns)
170-
index = r['time'](mat) if _is_null(rows) else list(rows)
201+
index = robj.r['time'](mat) if _is_null(rows) else list(rows)
171202
return pd.DataFrame(np.array(mat), index=_check_int(index),
172203
columns=columns)
173204

@@ -310,6 +341,10 @@ def convert_to_r_dataframe(df, strings_as_factors=False):
310341

311342
columns = rlc.OrdDict()
312343

344+
if isinstance(df, pd.Series):
345+
name = df.name or 'X0'
346+
df = pd.DataFrame(df, columns=[name])
347+
313348
# FIXME: This doesn't handle MultiIndex
314349

315350
for column in df:
@@ -365,5 +400,6 @@ def convert_to_r_matrix(df, strings_as_factors=False):
365400

366401
return r_matrix
367402

403+
368404
if __name__ == '__main__':
369405
pass

pandas/rpy/tests/test_common.py

+41
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,25 @@ def test_convert_r_dataframe(self):
9292
else:
9393
assert original == converted
9494

95+
def test_convert_r_dataframe_series(self):
96+
97+
s_noname = tm.makeFloatSeries()
98+
s_name = tm.makeFloatSeries()
99+
s_name.name = 'Test'
100+
101+
for series, key in [(s_noname, 'X0'), (s_name, s_name.name)]:
102+
103+
r_dataframe = com.convert_to_r_dataframe(series)
104+
105+
assert np.array_equal(
106+
com.convert_robj(r_dataframe.rownames), series.index)
107+
assert np.array_equal(
108+
com.convert_robj(r_dataframe.colnames), np.array([key]))
109+
110+
result = com.convert_robj(r_dataframe.rx2(key))
111+
result = np.array(result)
112+
assert np.array_equal(result, series.values)
113+
95114
def test_convert_r_matrix(self):
96115

97116
is_na = robj.baseenv.get("is.na")
@@ -207,6 +226,28 @@ def test_factor(self):
207226
result = com.load_data(name)
208227
assert np.equal(result, factors)
209228

229+
def test_assign_revert(self):
230+
df = tm.makeDataFrame()
231+
com.r.assign('df', df)
232+
# test R function call
233+
com.r('head(df)')
234+
result = com.r['df']
235+
tm.assert_frame_equal(df, result)
236+
237+
df = tm.makeTimeDataFrame()
238+
com.r.assign('df', df)
239+
result = com.r['df']
240+
result.index = pd.DatetimeIndex(result.index)
241+
tm.assert_frame_equal(df, result)
242+
243+
s = tm.makeFloatSeries()
244+
s.name = 'Test'
245+
com.r.assign('s', s)
246+
result = com.r['s']
247+
expected = pd.DataFrame(s, columns=['Test'])
248+
tm.assert_frame_equal(expected, result)
249+
250+
210251
if __name__ == '__main__':
211252
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
212253
# '--with-coverage', '--cover-package=pandas.core'],

0 commit comments

Comments
 (0)