Skip to content

Commit eecc018

Browse files
lbeltramewesm
authored andcommitted
ENH: Add support for converting DataFrames to R data.frames and
matrices, close #350
1 parent 4ea4cfc commit eecc018

File tree

1 file changed

+108
-1
lines changed

1 file changed

+108
-1
lines changed

pandas/rpy/common.py

+108-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
from rpy2.robjects import r
1313
import rpy2.robjects as robj
1414

15-
__all__ = ['convert_robj', 'load_data']
15+
__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
16+
'convert_to_r_matrix']
1617

1718
def load_data(name, package=None, convert=True):
1819
if package:
@@ -173,6 +174,81 @@ def convert_robj(obj, use_pandas=True):
173174

174175
raise Exception('Do not know what to do with %s object' % type(obj))
175176

177+
VECTOR_TYPES = {np.float64: robj.FloatVector,
178+
np.float32: robj.FloatVector,
179+
np.float: robj.FloatVector,
180+
np.int: robj.IntVector,
181+
np.int32: robj.IntVector,
182+
np.int64: robj.IntVector,
183+
np.object_: robj.StrVector,
184+
np.str: robj.StrVector}
185+
186+
def convert_to_r_dataframe(df, strings_as_factors=False):
187+
"""
188+
Convert a pandas DataFrame to a R data.frame.
189+
190+
Parameters
191+
----------
192+
df: The DataFrame being converted
193+
strings_as_factors: Whether to turn strings into R factors (default: False)
194+
195+
Returns
196+
-------
197+
A R data.frame
198+
199+
"""
200+
201+
import rpy2.rlike.container as rlc
202+
203+
columns = rlc.OrdDict()
204+
205+
#FIXME: This doesn't handle MultiIndex
206+
207+
for column in df:
208+
value = df[column]
209+
value_type = value.dtype.type
210+
value = [item if pn.notnull(item) else robj.NA_Logical
211+
for item in value]
212+
value = VECTOR_TYPES[value_type](value)
213+
214+
if not strings_as_factors:
215+
I = robj.baseenv.get("I")
216+
value = I(value)
217+
218+
columns[column] = value
219+
220+
r_dataframe = robj.DataFrame(columns)
221+
222+
del columns
223+
224+
r_dataframe.rownames = robj.StrVector(df.index)
225+
226+
return r_dataframe
227+
228+
229+
def convert_to_r_matrix(df, strings_as_factors=False):
230+
231+
"""
232+
Convert a pandas DataFrame to a R matrix.
233+
234+
Parameters
235+
----------
236+
df: The DataFrame being converted
237+
strings_as_factors: Whether to turn strings into R factors (default: False)
238+
239+
Returns
240+
-------
241+
A R matrix
242+
243+
"""
244+
245+
r_dataframe = convert_to_r_dataframe(df, strings_as_factors)
246+
as_matrix = robj.baseenv.get("as.matrix")
247+
r_matrix = as_matrix(r_dataframe)
248+
249+
return r_matrix
250+
251+
176252
def test_convert_list():
177253
obj = r('list(a=1, b=2, c=3)')
178254

@@ -213,6 +289,37 @@ def test_convert_matrix():
213289
assert np.array_equal(converted.index, ['a', 'b', 'c'])
214290
assert np.array_equal(converted.columns, ['one', 'two', 'three'])
215291

292+
def test_convert_r_dataframe():
293+
294+
seriesd = _test.getSeriesData()
295+
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
296+
297+
r_dataframe = convert_to_r_dataframe(frame)
298+
299+
assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
300+
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
301+
302+
for column in r_dataframe.colnames:
303+
coldata = r_dataframe.rx2(column)
304+
original_data = frame[column]
305+
assert np.array_equal(convert_robj(coldata), original_data)
306+
307+
def test_convert_r_matrix():
308+
309+
seriesd = _test.getSeriesData()
310+
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
311+
312+
r_dataframe = convert_to_r_matrix(frame)
313+
314+
assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
315+
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
316+
317+
for column in r_dataframe.colnames:
318+
coldata = r_dataframe.rx2(column)
319+
original_data = frame[column]
320+
assert np.array_equal(convert_robj(coldata), original_data)
321+
322+
216323

217324
if __name__ == '__main__':
218325
pass

0 commit comments

Comments
 (0)