From ade95fc1cb904269f64acfac0d2bdfcec9a4ee55 Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 19 Nov 2012 14:18:05 -0500 Subject: [PATCH 1/3] BUG: dtype=object should stop conversion from object in frame constructor #2255 --- pandas/core/frame.py | 28 +++++++++++++++------------- pandas/tests/test_frame.py | 6 ++++++ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 14b435e0aafc8..07bf38eb51464 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -402,7 +402,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, index = _get_names_from_index(data) if isinstance(data[0], (list, tuple, dict, Series)): - arrays, columns = _to_arrays(data, columns) + arrays, columns = _to_arrays(data, columns, dtype) columns = _ensure_index(columns) @@ -5159,7 +5159,7 @@ def _rec_to_dict(arr): return columns, sdict -def _to_arrays(data, columns, coerce_float=False): +def _to_arrays(data, columns, dtype=None, coerce_float=False): """ Return list of arrays, columns """ @@ -5167,30 +5167,31 @@ def _to_arrays(data, columns, coerce_float=False): if len(data) == 0: return [], columns if columns is not None else [] if isinstance(data[0], (list, tuple)): - return _list_to_arrays(data, columns, coerce_float=coerce_float) + return _list_to_arrays(data, columns, dtype=dtype, coerce_float=coerce_float) elif isinstance(data[0], dict): - return _list_of_dict_to_arrays(data, columns, + return _list_of_dict_to_arrays(data, columns, dtype=dtype, coerce_float=coerce_float) elif isinstance(data[0], Series): - return _list_of_series_to_arrays(data, columns, + return _list_of_series_to_arrays(data, columns, dtype=dtype, coerce_float=coerce_float) else: # last ditch effort data = map(tuple, data) - return _list_to_arrays(data, columns, coerce_float=coerce_float) + return _list_to_arrays(data, columns, dtype=dtype, + coerce_float=coerce_float) -def _list_to_arrays(data, columns, coerce_float=False): +def _list_to_arrays(data, columns, dtype=None, coerce_float=False): if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) else: # list of lists content = list(lib.to_object_array(data).T) - return _convert_object_array(content, columns, + return _convert_object_array(content, columns, dtype=dtype, coerce_float=coerce_float) -def _list_of_series_to_arrays(data, columns, coerce_float=False): +def _list_of_series_to_arrays(data, columns, dtype=None, coerce_float=False): from pandas.core.index import _get_combined_index if columns is None: @@ -5211,13 +5212,13 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False): if values.dtype == np.object_: content = list(values.T) - return _convert_object_array(content, columns, + return _convert_object_array(content, columns, dtype=dtype, coerce_float=coerce_float) else: return values.T, columns -def _list_of_dict_to_arrays(data, columns, coerce_float=False): +def _list_of_dict_to_arrays(data, columns, dtype=None, coerce_float=False): if columns is None: gen = (x.keys() for x in data) columns = lib.fast_unique_multiple_list_gen(gen) @@ -5228,11 +5229,11 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False): for d in data] content = list(lib.dicts_to_array(data, list(columns)).T) - return _convert_object_array(content, columns, + return _convert_object_array(content, columns, dtype=dtype, coerce_float=coerce_float) -def _convert_object_array(content, columns, coerce_float=False): +def _convert_object_array(content, columns, dtype=None, coerce_float=False): if columns is None: columns = _default_index(len(content)) else: @@ -5241,6 +5242,7 @@ def _convert_object_array(content, columns, coerce_float=False): 'columns' % (len(columns), len(content))) arrays = [lib.maybe_convert_objects(arr, try_float=coerce_float) + if dtype != object and dtype != np.object else arr for arr in content] return arrays, columns diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 042c744ef167a..ce9bfc2af1198 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1730,6 +1730,12 @@ def test_constructor_dtype_nocast_view(self): should_be_view[0][0] = 97 self.assertEqual(df.values[0, 0], 97) + def test_constructor_dtype_list_data(self): + df = DataFrame([[1, '2'], + [None, 'a']], dtype=object) + self.assert_(df.ix[1, 0] is None) + self.assert_(df.ix[0, 1] == '2') + def test_constructor_rec(self): rec = self.frame.to_records(index=False) From f2941c23166e03759986833ae1d3722b61328ce1 Mon Sep 17 00:00:00 2001 From: Chang She Date: Mon, 19 Nov 2012 15:49:51 -0500 Subject: [PATCH 2/3] CLN: put new kwd at the end --- pandas/core/frame.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 07bf38eb51464..614ff48b04fa7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -402,7 +402,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, index = _get_names_from_index(data) if isinstance(data[0], (list, tuple, dict, Series)): - arrays, columns = _to_arrays(data, columns, dtype) + arrays, columns = _to_arrays(data, columns, dtype=dtype) columns = _ensure_index(columns) @@ -5159,7 +5159,7 @@ def _rec_to_dict(arr): return columns, sdict -def _to_arrays(data, columns, dtype=None, coerce_float=False): +def _to_arrays(data, columns, coerce_float=False, dtype=None): """ Return list of arrays, columns """ @@ -5167,21 +5167,25 @@ def _to_arrays(data, columns, dtype=None, coerce_float=False): if len(data) == 0: return [], columns if columns is not None else [] if isinstance(data[0], (list, tuple)): - return _list_to_arrays(data, columns, dtype=dtype, coerce_float=coerce_float) + return _list_to_arrays(data, columns, coerce_float=coerce_float, + dtype=dtype) elif isinstance(data[0], dict): - return _list_of_dict_to_arrays(data, columns, dtype=dtype, - coerce_float=coerce_float) + return _list_of_dict_to_arrays(data, columns, + coerce_float=coerce_float, + dtype=dtype) elif isinstance(data[0], Series): - return _list_of_series_to_arrays(data, columns, dtype=dtype, - coerce_float=coerce_float) + return _list_of_series_to_arrays(data, columns, + coerce_float=coerce_float, + dtype=dtype) else: # last ditch effort data = map(tuple, data) - return _list_to_arrays(data, columns, dtype=dtype, - coerce_float=coerce_float) + return _list_to_arrays(data, columns, + coerce_float=coerce_float, + dtype=dtype) -def _list_to_arrays(data, columns, dtype=None, coerce_float=False): +def _list_to_arrays(data, columns, coerce_float=False, dtype=None): if len(data) > 0 and isinstance(data[0], tuple): content = list(lib.to_object_array_tuples(data).T) else: @@ -5191,7 +5195,7 @@ def _list_to_arrays(data, columns, dtype=None, coerce_float=False): coerce_float=coerce_float) -def _list_of_series_to_arrays(data, columns, dtype=None, coerce_float=False): +def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): from pandas.core.index import _get_combined_index if columns is None: @@ -5218,7 +5222,7 @@ def _list_of_series_to_arrays(data, columns, dtype=None, coerce_float=False): return values.T, columns -def _list_of_dict_to_arrays(data, columns, dtype=None, coerce_float=False): +def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: gen = (x.keys() for x in data) columns = lib.fast_unique_multiple_list_gen(gen) @@ -5233,7 +5237,7 @@ def _list_of_dict_to_arrays(data, columns, dtype=None, coerce_float=False): coerce_float=coerce_float) -def _convert_object_array(content, columns, dtype=None, coerce_float=False): +def _convert_object_array(content, columns, coerce_float=False, dtype=None): if columns is None: columns = _default_index(len(content)) else: From b5ea3870663de67a30e0271e8dcb23ff4dba7832 Mon Sep 17 00:00:00 2001 From: Chang She Date: Tue, 20 Nov 2012 16:14:20 -0500 Subject: [PATCH 3/3] Get rid of newline. Triggering Travis --- pandas/core/frame.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 614ff48b04fa7..04f72de80d500 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -403,7 +403,6 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if isinstance(data[0], (list, tuple, dict, Series)): arrays, columns = _to_arrays(data, columns, dtype=dtype) - columns = _ensure_index(columns) if index is None: