|
12 | 12 | from rpy2.robjects import r
|
13 | 13 | import rpy2.robjects as robj
|
14 | 14 |
|
15 |
| -__all__ = ['convert_robj', 'load_data'] |
| 15 | +__all__ = ['convert_robj', 'load_data', 'convert_to_r_dataframe', |
| 16 | + 'convert_to_r_matrix'] |
16 | 17 |
|
17 | 18 | def load_data(name, package=None, convert=True):
|
18 | 19 | if package:
|
@@ -173,6 +174,81 @@ def convert_robj(obj, use_pandas=True):
|
173 | 174 |
|
174 | 175 | raise Exception('Do not know what to do with %s object' % type(obj))
|
175 | 176 |
|
| 177 | +VECTOR_TYPES = {np.float64: robj.FloatVector, |
| 178 | + np.float32: robj.FloatVector, |
| 179 | + np.float: robj.FloatVector, |
| 180 | + np.int: robj.IntVector, |
| 181 | + np.int32: robj.IntVector, |
| 182 | + np.int64: robj.IntVector, |
| 183 | + np.object_: robj.StrVector, |
| 184 | + np.str: robj.StrVector} |
| 185 | + |
| 186 | +def convert_to_r_dataframe(df, strings_as_factors=False): |
| 187 | + """ |
| 188 | + Convert a pandas DataFrame to a R data.frame. |
| 189 | +
|
| 190 | + Parameters |
| 191 | + ---------- |
| 192 | + df: The DataFrame being converted |
| 193 | + strings_as_factors: Whether to turn strings into R factors (default: False) |
| 194 | +
|
| 195 | + Returns |
| 196 | + ------- |
| 197 | + A R data.frame |
| 198 | +
|
| 199 | + """ |
| 200 | + |
| 201 | + import rpy2.rlike.container as rlc |
| 202 | + |
| 203 | + columns = rlc.OrdDict() |
| 204 | + |
| 205 | + #FIXME: This doesn't handle MultiIndex |
| 206 | + |
| 207 | + for column in df: |
| 208 | + value = df[column] |
| 209 | + value_type = value.dtype.type |
| 210 | + value = [item if pn.notnull(item) else robj.NA_Logical |
| 211 | + for item in value] |
| 212 | + value = VECTOR_TYPES[value_type](value) |
| 213 | + |
| 214 | + if not strings_as_factors: |
| 215 | + I = robj.baseenv.get("I") |
| 216 | + value = I(value) |
| 217 | + |
| 218 | + columns[column] = value |
| 219 | + |
| 220 | + r_dataframe = robj.DataFrame(columns) |
| 221 | + |
| 222 | + del columns |
| 223 | + |
| 224 | + r_dataframe.rownames = robj.StrVector(df.index) |
| 225 | + |
| 226 | + return r_dataframe |
| 227 | + |
| 228 | + |
| 229 | +def convert_to_r_matrix(df, strings_as_factors=False): |
| 230 | + |
| 231 | + """ |
| 232 | + Convert a pandas DataFrame to a R matrix. |
| 233 | +
|
| 234 | + Parameters |
| 235 | + ---------- |
| 236 | + df: The DataFrame being converted |
| 237 | + strings_as_factors: Whether to turn strings into R factors (default: False) |
| 238 | +
|
| 239 | + Returns |
| 240 | + ------- |
| 241 | + A R matrix |
| 242 | +
|
| 243 | + """ |
| 244 | + |
| 245 | + r_dataframe = convert_to_r_dataframe(df, strings_as_factors) |
| 246 | + as_matrix = robj.baseenv.get("as.matrix") |
| 247 | + r_matrix = as_matrix(r_dataframe) |
| 248 | + |
| 249 | + return r_matrix |
| 250 | + |
| 251 | + |
176 | 252 | def test_convert_list():
|
177 | 253 | obj = r('list(a=1, b=2, c=3)')
|
178 | 254 |
|
@@ -213,6 +289,37 @@ def test_convert_matrix():
|
213 | 289 | assert np.array_equal(converted.index, ['a', 'b', 'c'])
|
214 | 290 | assert np.array_equal(converted.columns, ['one', 'two', 'three'])
|
215 | 291 |
|
| 292 | +def test_convert_r_dataframe(): |
| 293 | + |
| 294 | + seriesd = _test.getSeriesData() |
| 295 | + frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) |
| 296 | + |
| 297 | + r_dataframe = convert_to_r_dataframe(frame) |
| 298 | + |
| 299 | + assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) |
| 300 | + assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) |
| 301 | + |
| 302 | + for column in r_dataframe.colnames: |
| 303 | + coldata = r_dataframe.rx2(column) |
| 304 | + original_data = frame[column] |
| 305 | + assert np.array_equal(convert_robj(coldata), original_data) |
| 306 | + |
| 307 | +def test_convert_r_matrix(): |
| 308 | + |
| 309 | + seriesd = _test.getSeriesData() |
| 310 | + frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A']) |
| 311 | + |
| 312 | + r_dataframe = convert_to_r_matrix(frame) |
| 313 | + |
| 314 | + assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index) |
| 315 | + assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns) |
| 316 | + |
| 317 | + for column in r_dataframe.colnames: |
| 318 | + coldata = r_dataframe.rx2(column) |
| 319 | + original_data = frame[column] |
| 320 | + assert np.array_equal(convert_robj(coldata), original_data) |
| 321 | + |
| 322 | + |
216 | 323 |
|
217 | 324 | if __name__ == '__main__':
|
218 | 325 | pass
|
0 commit comments