Skip to content

Commit a2cd13a

Browse files
committed
BUG: fix handling of NAs in Factors in pandas.rpy, close #1615
1 parent 8db3848 commit a2cd13a

File tree

2 files changed

+25
-12
lines changed

2 files changed

+25
-12
lines changed

RELEASE.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pandas 0.8.2
3838

3939
**Bug fixes**
4040

41-
- Fix critical DatetimeIndex.union bugs (#1730, #1719, #1745)
41+
- Fix critical DatetimeIndex.union bugs (#1730, #1719, #1745, #1702)
4242
- Fix MM-YYYY time series indexing case (#1672)
4343
- Fix case where Categorical group key was not being passed into index in
4444
GroupBy result (#1701)
@@ -47,6 +47,7 @@ pandas 0.8.2
4747
and 1.7 (#1717)
4848
- Fix performance issue in MultiIndex.format (#1746)
4949
- Fixed GroupBy bugs interacting with DatetimeIndex asof / map methods (#1677)
50+
- Handle factors with NAs in pandas.rpy (#1615)
5051

5152
pandas 0.8.1
5253
============

pandas/rpy/common.py

+23-11
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@
55

66
import numpy as np
77

8-
import pandas as pn
8+
import pandas as pd
9+
import pandas.core.common as com
910
import pandas.util.testing as _test
1011

1112
from rpy2.robjects.packages import importr
@@ -17,7 +18,7 @@
1718

1819
def load_data(name, package=None, convert=True):
1920
if package:
20-
pack = importr(package)
21+
importr(package)
2122

2223
r.data(name)
2324

@@ -62,9 +63,9 @@ def _convert_array(obj):
6263
if obj.names is not None:
6364
name_list = [list(x) for x in obj.names]
6465
if len(dim) == 2:
65-
return pn.DataFrame(arr, index=name_list[0], columns=name_list[1])
66+
return pd.DataFrame(arr, index=name_list[0], columns=name_list[1])
6667
elif len(dim) == 3:
67-
return pn.Panel(arr, items=name_list[2],
68+
return pd.Panel(arr, items=name_list[2],
6869
major_axis=name_list[0],
6970
minor_axis=name_list[1])
7071
else:
@@ -107,11 +108,22 @@ def _convert_DataFrame(rdf):
107108
values = _convert_vector(vec)
108109

109110
if isinstance(vec, robj.FactorVector):
110-
values = np.asarray(vec.levels).take(values - 1)
111+
levels = np.asarray(vec.levels)
112+
if com.is_float_dtype(values):
113+
mask = np.isnan(values)
114+
notmask = -mask
115+
result = np.empty(len(values), dtype=object)
116+
result[mask] = np.nan
117+
118+
locs = (values[notmask] - 1).astype(np.int_)
119+
result[notmask] = levels.take(locs)
120+
values = result
121+
else:
122+
values = np.asarray(vec.levels).take(values - 1)
111123

112124
data[col] = values
113125

114-
return pn.DataFrame(data, index=_check_int(rows), columns=columns)
126+
return pd.DataFrame(data, index=_check_int(rows), columns=columns)
115127

116128
def _convert_Matrix(mat):
117129
columns = mat.colnames
@@ -120,7 +132,7 @@ def _convert_Matrix(mat):
120132
columns = None if _is_null(columns) else list(columns)
121133
index = None if _is_null(rows) else list(rows)
122134

123-
return pn.DataFrame(np.array(mat), index=_check_int(index),
135+
return pd.DataFrame(np.array(mat), index=_check_int(index),
124136
columns=columns)
125137

126138
def _check_int(vec):
@@ -218,7 +230,7 @@ def convert_to_r_dataframe(df, strings_as_factors=False):
218230
for column in df:
219231
value = df[column]
220232
value_type = value.dtype.type
221-
value = [item if pn.notnull(item) else NA_TYPES[value_type]
233+
value = [item if pd.notnull(item) else NA_TYPES[value_type]
222234
for item in value]
223235

224236
value = VECTOR_TYPES[value_type](value)
@@ -311,7 +323,7 @@ def test_convert_r_dataframe():
311323
is_na = robj.baseenv.get("is.na")
312324

313325
seriesd = _test.getSeriesData()
314-
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
326+
frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
315327

316328
#Null data
317329
frame["E"] = [np.nan for item in frame["A"]]
@@ -333,7 +345,7 @@ def test_convert_r_dataframe():
333345
for original, converted in zip(frame[column],
334346
r_dataframe.rx2(column)):
335347

336-
if pn.isnull(original):
348+
if pd.isnull(original):
337349
assert is_na(converted)
338350
else:
339351
assert original == converted
@@ -343,7 +355,7 @@ def test_convert_r_matrix():
343355
is_na = robj.baseenv.get("is.na")
344356

345357
seriesd = _test.getSeriesData()
346-
frame = pn.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
358+
frame = pd.DataFrame(seriesd, columns=['D', 'C', 'B', 'A'])
347359
#Null data
348360
frame["E"] = [np.nan for item in frame["A"]]
349361

0 commit comments

Comments
 (0)