diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 977dc9e2b56ff..9305fa18e23e2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -392,15 +392,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, mgr = self._init_mgr(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, dict): mgr = self._init_dict(data, index, columns, dtype=dtype) - elif isinstance(data, ma.MaskedArray): - mask = ma.getmaskarray(data) - if mask.any(): - data, fill_value = _maybe_upcast(data, copy=True) - data[mask] = fill_value - else: - data = data.copy() - mgr = self._init_ndarray(data, index, columns, dtype=dtype, - copy=copy) elif isinstance(data, np.ndarray): if data.dtype.names: data_columns, data = _rec_to_dict(data) @@ -408,8 +399,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, columns = data_columns mgr = self._init_dict(data, index, columns, dtype=dtype) else: - mgr = self._init_ndarray(data, index, columns, dtype=dtype, - copy=copy) + mgr = self._init_ndarray(_unmask(data), index, columns, + dtype=dtype, copy=copy) elif isinstance(data, list): if len(data) > 0: if index is None and isinstance(data[0], Series): @@ -5424,10 +5415,21 @@ def convert(v): return values +def _unmask(arr): + if isinstance(arr, ma.MaskedArray): + mask = ma.getmaskarray(arr) + if mask.any(): + arr, fill_value = _maybe_upcast(arr, copy=True) + arr[mask] = fill_value + return arr.copy() + return arr.copy() + return arr + + def _rec_to_dict(arr): if isinstance(arr, np.ndarray): columns = list(arr.dtype.names) - sdict = dict((k, arr[k]) for k in columns) + sdict = dict((k, _unmask(arr[k])) for k in columns) elif isinstance(arr, DataFrame): columns = list(arr.columns) sdict = dict((k, v.values) for k, v in arr.iteritems()) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 7bafed216b9b9..63e3a59d09c35 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -3,6 +3,8 @@ from datetime import datetime, timedelta, time from StringIO import StringIO import cPickle as pickle +import functools +import itertools import operator import os import unittest @@ -13,6 +15,7 @@ from numpy.random import randn import numpy as np import numpy.ma as ma +import numpy.ma.mrecords as mrecords from numpy.testing import assert_array_equal import pandas as pan @@ -2491,6 +2494,39 @@ def test_constructor_maskedarray_nonfloat(self): self.assertEqual(True, frame['A'][1]) self.assertEqual(False, frame['C'][2]) + def test_constructor_mrecarray(self): + """Ensure mrecarray produces frame identical to dict of masked arrays + """ + assert_fr_equal = functools.partial(assert_frame_equal, + check_index_type=True, + check_column_type=True, + check_frame_type=True) + arrays = [ + ('float', np.array([1.5, 2.0])), + ('int', np.array([1, 2])), + ('str', np.array(['abc', 'def'])), + ] + for name, arr in arrays[:]: + arrays.append(('masked1_' + name, + np.ma.masked_array(arr, mask=[False, True]))) + arrays.append(('masked_all', np.ma.masked_all((2,)))) + arrays.append(('masked_none', + np.ma.masked_array([1.0, 2.5], mask=False))) + + # call assert_frame_equal for all selections of 3 arrays + for comb in itertools.combinations(arrays, 3): + names, data = zip(*comb) + print(names) + mrecs = mrecords.fromarrays(data, names=names) + assert_fr_equal(DataFrame(mrecs), + DataFrame(dict(comb), columns=names)) + # specify columns + assert_fr_equal(DataFrame(mrecs, columns=names[::-1]), + DataFrame(dict(comb), columns=names[::-1])) + # specify index + assert_fr_equal(DataFrame(mrecs, index=[1, 2]), + DataFrame(dict(comb), columns=names, index=[1,2])) + def test_constructor_corner(self): df = DataFrame(index=[]) self.assertEqual(df.values.shape, (0, 0))