diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 401a7746953cb..26877752017e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5680,49 +5680,71 @@ def _arrays_to_mgr(arrays, arr_names, index, columns, dtype=None): return create_block_manager_from_arrays(arrays, arr_names, axes) def extract_index(data): + # Slightly misleading name. + # Indexes are only extracted for elements in the iterable + # `data` inheriting from Series. from pandas.core.index import _union_indexes index = None if len(data) == 0: index = Index([]) - elif len(data) > 0 and index is None: + elif len(data) > 0: raw_lengths = [] indexes = [] have_raw_arrays = False have_series = False - have_dicts = False + have_mappings = False + # Loop over the element, such as vectors `v` corresponding + # to columns in the DataFrame for v in data: if isinstance(v, Series): have_series = True indexes.append(v.index) - elif isinstance(v, dict): - have_dicts = True - indexes.append(v.keys()) - elif isinstance(v, (list, tuple, np.ndarray)): - have_raw_arrays = True - raw_lengths.append(len(v)) + else: + # When an OrderedDict, the mapping aspect + # is given priority, although there is a warning when + # mixture of sequences and mapping. The unit tests + # show that this is the desired behaviour. + # Also, shouldn't a `bytes` object be considered a scalar ? + is_mapping = isinstance(v, collections.Mapping) + is_sequence = (isinstance(v, collections.Sequence) or \ + _is_sequence(v)) and not isinstance(v, basestring) + if is_mapping: + have_mappings = True + indexes.append(v.keys()) + elif is_sequence: + # This is a sequence-but-not-a-string + # Although strings have a __len__, + # they will be considered scalar. + have_raw_arrays = True + raw_lengths.append(len(v)) + else: + # Item v is silently ignored, + # as it is not anything an index can be inferred + # from. + pass if not indexes and not raw_lengths: - raise ValueError('If using all scalar values, you must must pass' + raise ValueError('If using all scalar values, you must pass' ' an index') - if have_series or have_dicts: + if have_series or have_mappings: index = _union_indexes(indexes) if have_raw_arrays: lengths = list(set(raw_lengths)) if len(lengths) > 1: - raise ValueError('arrays must all be same length') + raise ValueError('Arrays must all be same length') - if have_dicts: + if have_mappings: raise ValueError('Mixing dicts with non-Series may lead to ' 'ambiguous ordering.') if have_series: if lengths[0] != len(index): - msg = ('array length %d does not match index length %d' + msg = ('Array length %d does not match index length %d' % (lengths[0], len(index))) raise ValueError(msg) else: diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a9df56a498f63..7853af0964cfd 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2236,7 +2236,7 @@ def testit(): def testit(): DataFrame({'a': False, 'b': True}) - assertRaisesRegexp(ValueError, 'If using all scalar values, you must must pass an index', testit) + assertRaisesRegexp(ValueError, 'If using all scalar values, you must pass an index', testit) def test_insert_error_msmgs(self): @@ -2774,6 +2774,17 @@ def test_constructor_from_items(self): # pass some columns recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) assert_frame_equal(recons, self.frame.ix[:, ['C', 'B', 'A']]) + # not any column either a dict, a list, a tuple, or a numpy.ndarray + import array + recons_ar = DataFrame.from_items([('A', array.array('i', range(10)))]) + recons_rg = DataFrame.from_items([('A', range(10))]) + recons_np = DataFrame.from_items([('A', np.array(range(10)))]) + self.assertEquals(tuple(recons_ar['A']), + tuple(recons_rg['A'])) + self.assertEquals(tuple(recons_ar['A']), + tuple(recons_np['A'])) + + # orient='index'