Skip to content

ENH: Add support for converting DataFrames to R data.frames and matrices #1212

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 108 additions & 1 deletion pandas/rpy/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
from rpy2.robjects import r
import rpy2.robjects as robj

__all__ = ['convert_robj', 'load_data']
__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe',
'convert_to_r_matrix']

def load_data(name, package=None, convert=True):
if package:
Expand Down Expand Up @@ -173,6 +174,81 @@ def convert_robj(obj, use_pandas=True):

raise Exception('Do not know what to do with %s object' % type(obj))

VECTOR_TYPES = {np.float64: robj.FloatVector,
np.float32: robj.FloatVector,
np.float: robj.FloatVector,
np.int: robj.IntVector,
np.int32: robj.IntVector,
np.int64: robj.IntVector,
np.object_: robj.StrVector,
np.str: robj.StrVector}

def convert_to_r_dataframe(df, strings_as_factors=False):
"""
Convert a pandas DataFrame to a R data.frame.

Parameters
----------
df: The DataFrame being converted
strings_as_factors: Whether to turn strings into R factors (default: False)

Returns
-------
A R data.frame

"""

import rpy2.rlike.container as rlc

columns = rlc.OrdDict()

#FIXME: This doesn't handle MultiIndex

for column in df:
value = df[column]
value_type = value.dtype.type
value = [item if pn.notnull(item) else robj.NA_Logical
for item in value]
value = VECTOR_TYPES[value_type](value)

if not strings_as_factors:
I = robj.baseenv.get("I")
value = I(value)

columns[column] = value

r_dataframe = robj.DataFrame(columns)

del columns

r_dataframe.rownames = robj.StrVector(df.index)

return r_dataframe


def convert_to_r_matrix(df, strings_as_factors=False):

"""
Convert a pandas DataFrame to a R matrix.

Parameters
----------
df: The DataFrame being converted
strings_as_factors: Whether to turn strings into R factors (default: False)

Returns
-------
A R matrix

"""

r_dataframe = convert_to_r_dataframe(df, strings_as_factors)
as_matrix = robj.baseenv.get("as.matrix")
r_matrix = as_matrix(r_dataframe)

return r_matrix


def test_convert_list():
obj = r('list(a=1, b=2, c=3)')

Expand Down Expand Up @@ -213,6 +289,37 @@ def test_convert_matrix():
assert np.array_equal(converted.index, ['a', 'b', 'c'])
assert np.array_equal(converted.columns, ['one', 'two', 'three'])

def test_convert_r_dataframe():

seriesd = _test.getSeriesData()
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

r_dataframe = convert_to_r_dataframe(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)

for column in r_dataframe.colnames:
coldata = r_dataframe.rx2(column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata), original_data)

def test_convert_r_matrix():

seriesd = _test.getSeriesData()
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])

r_dataframe = convert_to_r_matrix(frame)

assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)

for column in r_dataframe.colnames:
coldata = r_dataframe.rx(True, column)
original_data = frame[column]
assert np.array_equal(convert_robj(coldata), original_data)



if __name__ == '__main__':
pass