From 6a1630337716f44e2f8957623ff91327e6c91b16 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 7 Jun 2014 19:13:03 +0900 Subject: [PATCH] ENH: automatic rpy2 instance conversion --- pandas/rpy/common.py | 74 ++++++++++++++++++++++++--------- pandas/rpy/tests/test_common.py | 41 ++++++++++++++++++ 2 files changed, 96 insertions(+), 19 deletions(-) diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 55adad3610816..a0ccc72f7ab1c 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -10,38 +10,69 @@ import pandas as pd import pandas.core.common as com -import pandas.util.testing as _test from rpy2.robjects.packages import importr -from rpy2.robjects import r import rpy2.robjects as robj import itertools as IT __all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe', - 'convert_to_r_matrix'] + 'convert_to_r_matrix', 'r'] + + +def _assign(attr, obj): + if isinstance(obj, (pd.DataFrame, pd.Series)): + obj = convert_to_r_dataframe(obj) + return robj.r.assign(attr, obj) + + +# Unable to subclass robjects.R because +# it has special creation process using rinterface +class _RPandas(object): + + def __getattribute__(self, attr): + if attr == 'assign': + return _assign + return getattr(robj.r, attr) + + def __getitem__(self, item): + result = robj.r[item] + try: + result = convert_robj(result) + except TypeError: + pass + return result + + def __str__(self): + return str(robj.r) + + def __call__(self, string): + return robj.r(string) + + +r = _RPandas() def load_data(name, package=None, convert=True): if package: importr(package) - r.data(name) + robj.r.data(name) - robj = r[name] + r_obj = robj.r[name] if convert: - return convert_robj(robj) + return convert_robj(r_obj) else: - return robj + return r_obj def _rclass(obj): """ Return R class name for input object """ - return r['class'](obj)[0] + return robj.r['class'](obj)[0] def _is_null(obj): @@ -54,12 +85,12 @@ def _convert_list(obj): """ try: values = [convert_robj(x) for x in obj] - keys = r['names'](obj) + keys = robj.r['names'](obj) return dict(zip(keys, values)) except TypeError: # For state.division and state.region - factors = list(r['factor'](obj)) - level = list(r['levels'](obj)) + factors = list(robj.r['factor'](obj)) + level = list(robj.r['levels'](obj)) result = [level[index-1] for index in factors] return result @@ -77,9 +108,9 @@ def _list(item): # For iris3, HairEyeColor, UCBAdmissions, Titanic dim = list(obj.dim) values = np.array(list(obj)) - names = r['dimnames'](obj) + names = robj.r['dimnames'](obj) try: - columns = list(r['names'](names))[::-1] + columns = list(robj.r['names'](names))[::-1] except TypeError: columns = ['X{:d}'.format(i) for i in range(len(names))][::-1] columns.append('value') @@ -98,18 +129,18 @@ def _convert_vector(obj): # Check if the vector has extra information attached to it that can be used # as an index try: - attributes = set(r['attributes'](obj).names) + attributes = set(robj.r['attributes'](obj).names) except AttributeError: return list(obj) if 'names' in attributes: - return pd.Series(list(obj), index=r['names'](obj)) + return pd.Series(list(obj), index=robj.r['names'](obj)) elif 'tsp' in attributes: - return pd.Series(list(obj), index=r['time'](obj)) + return pd.Series(list(obj), index=robj.r['time'](obj)) elif 'labels' in attributes: - return pd.Series(list(obj), index=r['labels'](obj)) + return pd.Series(list(obj), index=robj.r['labels'](obj)) if _rclass(obj) == 'dist': # For 'eurodist'. WARNING: This results in a DataFrame, not a Series or list. - matrix = r['as.matrix'](obj) + matrix = robj.r['as.matrix'](obj) return convert_robj(matrix) else: return list(obj) @@ -167,7 +198,7 @@ def _convert_Matrix(mat): rows = mat.rownames columns = None if _is_null(columns) else list(columns) - index = r['time'](mat) if _is_null(rows) else list(rows) + index = robj.r['time'](mat) if _is_null(rows) else list(rows) return pd.DataFrame(np.array(mat), index=_check_int(index), columns=columns) @@ -310,6 +341,10 @@ def convert_to_r_dataframe(df, strings_as_factors=False): columns = rlc.OrdDict() + if isinstance(df, pd.Series): + name = df.name or 'X0' + df = pd.DataFrame(df, columns=[name]) + # FIXME: This doesn't handle MultiIndex for column in df: @@ -365,5 +400,6 @@ def convert_to_r_matrix(df, strings_as_factors=False): return r_matrix + if __name__ == '__main__': pass diff --git a/pandas/rpy/tests/test_common.py b/pandas/rpy/tests/test_common.py index a2e6d08d07b58..b0f20fed477af 100644 --- a/pandas/rpy/tests/test_common.py +++ b/pandas/rpy/tests/test_common.py @@ -92,6 +92,25 @@ def test_convert_r_dataframe(self): else: assert original == converted + def test_convert_r_dataframe_series(self): + + s_noname = tm.makeFloatSeries() + s_name = tm.makeFloatSeries() + s_name.name = 'Test' + + for series, key in [(s_noname, 'X0'), (s_name, s_name.name)]: + + r_dataframe = com.convert_to_r_dataframe(series) + + assert np.array_equal( + com.convert_robj(r_dataframe.rownames), series.index) + assert np.array_equal( + com.convert_robj(r_dataframe.colnames), np.array([key])) + + result = com.convert_robj(r_dataframe.rx2(key)) + result = np.array(result) + assert np.array_equal(result, series.values) + def test_convert_r_matrix(self): is_na = robj.baseenv.get("is.na") @@ -207,6 +226,28 @@ def test_factor(self): result = com.load_data(name) assert np.equal(result, factors) + def test_assign_revert(self): + df = tm.makeDataFrame() + com.r.assign('df', df) + # test R function call + com.r('head(df)') + result = com.r['df'] + tm.assert_frame_equal(df, result) + + df = tm.makeTimeDataFrame() + com.r.assign('df', df) + result = com.r['df'] + result.index = pd.DatetimeIndex(result.index) + tm.assert_frame_equal(df, result) + + s = tm.makeFloatSeries() + s.name = 'Test' + com.r.assign('s', s) + result = com.r['s'] + expected = pd.DataFrame(s, columns=['Test']) + tm.assert_frame_equal(expected, result) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], # '--with-coverage', '--cover-package=pandas.core'],