Skip to content

Commit bc40729

Browse files
lbeltramewesm
authored andcommitted
BUG: Use NA types for better conversion, and refuse matrix conversions
of mixed-type DataFrames, fix #1282
1 parent dc96e7a commit bc40729

File tree

1 file changed

+55
-5
lines changed

1 file changed

+55
-5
lines changed

pandas/rpy/common.py

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,18 @@ def convert_robj(obj, use_pandas=True):
181181
np.int32: robj.IntVector,
182182
np.int64: robj.IntVector,
183183
np.object_: robj.StrVector,
184-
np.str: robj.StrVector}
184+
np.str: robj.StrVector,
185+
np.bool: robj.BoolVector}
186+
187+
NA_TYPES = {np.float64: robj.NA_Real,
188+
np.float32: robj.NA_Real,
189+
np.float: robj.NA_Real,
190+
np.int: robj.NA_Integer,
191+
np.int32: robj.NA_Integer,
192+
np.int64: robj.NA_Integer,
193+
np.object_: robj.NA_Character,
194+
np.str: robj.NA_Character,
195+
np.bool: robj.NA_Logical}
185196

186197
def convert_to_r_dataframe(df, strings_as_factors=False):
187198
"""
@@ -207,8 +218,9 @@ def convert_to_r_dataframe(df, strings_as_factors=False):
207218
for column in df:
208219
value = df[column]
209220
value_type = value.dtype.type
210-
value = [item if pn.notnull(item) else robj.NA_Real
221+
value = [item if pn.notnull(item) else NA_TYPES[value_type]
211222
for item in value]
223+
212224
value = VECTOR_TYPES[value_type](value)
213225

214226
if not strings_as_factors:
@@ -242,6 +254,11 @@ def convert_to_r_matrix(df, strings_as_factors=False):
242254
243255
"""
244256

257+
if df._is_mixed_type:
258+
raise TypeError("Conversion to matrix only possible with non-mixed "
259+
"type DataFrames")
260+
261+
245262
r_dataframe = convert_to_r_dataframe(df, strings_as_factors)
246263
as_matrix = robj.baseenv.get("as.matrix")
247264
r_matrix = as_matrix(r_dataframe)
@@ -291,34 +308,67 @@ def test_convert_matrix():
291308

292309
def test_convert_r_dataframe():
293310

311+
is_na = robj.baseenv.get("is.na")
312+
294313
seriesd = _test.getSeriesData()
295314
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
296315

316+
#Null data
317+
frame["E"] = [np.nan for item in frame["A"]]
318+
# Some mixed type data
319+
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]
320+
297321
r_dataframe = convert_to_r_dataframe(frame)
298322

299323
assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
300324
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
325+
assert all(is_na(item) for item in r_dataframe.rx2("E"))
301326

302-
for column in r_dataframe.colnames:
327+
for column in frame[["A", "B", "C", "D"]]:
303328
coldata = r_dataframe.rx2(column)
304329
original_data = frame[column]
305330
assert np.array_equal(convert_robj(coldata), original_data)
306331

332+
for column in frame[["D", "E"]]:
333+
for original, converted in zip(frame[column],
334+
r_dataframe.rx2(column)):
335+
336+
if pn.isnull(original):
337+
assert is_na(converted)
338+
else:
339+
assert original == converted
340+
307341
def test_convert_r_matrix():
308342

343+
is_na = robj.baseenv.get("is.na")
344+
309345
seriesd = _test.getSeriesData()
310346
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
347+
#Null data
348+
frame["E"] = [np.nan for item in frame["A"]]
311349

312350
r_dataframe = convert_to_r_matrix(frame)
313351

314352
assert np.array_equal(convert_robj(r_dataframe.rownames), frame.index)
315353
assert np.array_equal(convert_robj(r_dataframe.colnames), frame.columns)
354+
assert all(is_na(item) for item in r_dataframe.rx(True, "E"))
316355

317-
for column in r_dataframe.colnames:
356+
for column in frame[["A", "B", "C", "D"]]:
318357
coldata = r_dataframe.rx(True, column)
319358
original_data = frame[column]
320-
assert np.array_equal(convert_robj(coldata), original_data)
359+
assert np.array_equal(convert_robj(coldata),
360+
original_data)
361+
362+
# Pandas bug 1282
363+
frame["F"] = ["text" if item % 2 == 0 else np.nan for item in range(30)]
321364

365+
#FIXME: Ugly, this whole module needs to be ported to nose/unittest
366+
try:
367+
wrong_matrix = convert_to_r_matrix(frame)
368+
except TypeError:
369+
pass
370+
except Exception:
371+
raise
322372

323373

324374
if __name__ == '__main__':

0 commit comments

Comments
 (0)