Skip to content

Commit d765439

Browse files
committed
BUG: from_dict ignored order of OrderedDict (#8425)
1 parent 2d2606d commit d765439

File tree

6 files changed

+42
-7
lines changed

6 files changed

+42
-7
lines changed

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,7 @@ Indexing
633633
- Bug in which :meth:`DataFrame.to_csv` caused a segfault for a reindexed data frame, when the indices were single-level :class:`MultiIndex` (:issue:`26303`).
634634
- Fixed bug where assigning a :class:`arrays.PandasArray` to a :class:`pandas.core.frame.DataFrame` would raise error (:issue:`26390`)
635635
- Allow keyword arguments for callable local reference used in the :meth:`DataFrame.query` string (:issue:`26426`)
636+
- Bug in which :meth:`DataFrame.from_dict` ignored order of OrderedDict when orient='index' (:issue:`8425`).
636637

637638

638639
Missing

pandas/_libs/lib.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ def fast_unique_multiple(list arrays, sort: bool=True):
240240

241241
@cython.wraparound(False)
242242
@cython.boundscheck(False)
243-
def fast_unique_multiple_list(lists: list, sort: bool=True) -> list:
243+
def fast_unique_multiple_list(lists: list, sort: bool=True,
244+
ordered: bool=False) -> list:
244245
cdef:
245246
list buf
246247
Py_ssize_t k = len(lists)
@@ -257,7 +258,7 @@ def fast_unique_multiple_list(lists: list, sort: bool=True) -> list:
257258
if val not in table:
258259
table[val] = stub
259260
uniques.append(val)
260-
if sort:
261+
if sort and not ordered:
261262
try:
262263
uniques.sort()
263264
except Exception:

pandas/core/indexes/api.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def _get_combined_index(indexes, intersect=False, sort=False):
125125
return index
126126

127127

128-
def _union_indexes(indexes, sort=True):
128+
def _union_indexes(indexes, sort=True, ordered=False):
129129
"""
130130
Return the union of indexes.
131131
@@ -136,6 +136,8 @@ def _union_indexes(indexes, sort=True):
136136
indexes : list of Index or list objects
137137
sort : bool, default True
138138
Whether the result index should come out sorted or not.
139+
ordered : bool, default False
140+
Whether ordered indexes, such as keys of OrderedDict, is passed
139141
140142
Returns
141143
-------
@@ -171,7 +173,8 @@ def conv(i):
171173
return i
172174

173175
return Index(
174-
lib.fast_unique_multiple_list([conv(i) for i in inds], sort=sort))
176+
lib.fast_unique_multiple_list([conv(i) for i in inds],
177+
sort=sort, ordered=ordered))
175178

176179
if kind == 'special':
177180
result = indexes[0]

pandas/core/internals/construction.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -285,13 +285,16 @@ def extract_index(data):
285285
have_raw_arrays = False
286286
have_series = False
287287
have_dicts = False
288+
have_ordered = False
288289

289290
for val in data:
290291
if isinstance(val, ABCSeries):
291292
have_series = True
292293
indexes.append(val.index)
293294
elif isinstance(val, dict):
294295
have_dicts = True
296+
if isinstance(val, OrderedDict):
297+
have_ordered = True
295298
indexes.append(list(val.keys()))
296299
elif is_list_like(val) and getattr(val, 'ndim', 1) == 1:
297300
have_raw_arrays = True
@@ -302,7 +305,7 @@ def extract_index(data):
302305
' an index')
303306

304307
if have_series or have_dicts:
305-
index = _union_indexes(indexes)
308+
index = _union_indexes(indexes, ordered=have_ordered)
306309

307310
if have_raw_arrays:
308311
lengths = list(set(raw_lengths))

pandas/tests/frame/test_constructors.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -1153,7 +1153,7 @@ def test_constructor_list_of_series(self):
11531153

11541154
sdict = OrderedDict(zip(['x', 'Unnamed 0'], data))
11551155
expected = DataFrame.from_dict(sdict, orient='index')
1156-
tm.assert_frame_equal(result.sort_index(), expected)
1156+
tm.assert_frame_equal(result, expected)
11571157

11581158
# none named
11591159
data = [OrderedDict([['a', 1.5], ['b', 3], ['c', 4], ['d', 6]]),
@@ -1288,7 +1288,7 @@ def test_constructor_list_of_namedtuples(self):
12881288
def test_constructor_orient(self):
12891289
data_dict = self.mixed_frame.T._series
12901290
recons = DataFrame.from_dict(data_dict, orient='index')
1291-
expected = self.mixed_frame.sort_index()
1291+
expected = self.mixed_frame.reindex(recons.index)
12921292
tm.assert_frame_equal(recons, expected)
12931293

12941294
# dict of sequence
@@ -1298,6 +1298,16 @@ def test_constructor_orient(self):
12981298
xp = DataFrame.from_dict(a).T.reindex(list(a.keys()))
12991299
tm.assert_frame_equal(rs, xp)
13001300

1301+
def test_constructor_from_ordered_dict(self):
1302+
# GH8425
1303+
a = OrderedDict([
1304+
('one', OrderedDict([('col_a', 'foo1'), ('col_b', 'bar1')])),
1305+
('two', OrderedDict([('col_a', 'foo2'), ('col_b', 'bar2')])),
1306+
('three', OrderedDict([('col_a', 'foo3'), ('col_b', 'bar3')]))])
1307+
expected = DataFrame.from_dict(a, orient='columns').T
1308+
result = DataFrame.from_dict(a, orient='index')
1309+
tm.assert_frame_equal(result, expected)
1310+
13011311
def test_from_dict_columns_parameter(self):
13021312
# GH 18529
13031313
# Test new columns parameter for from_dict that was added to make

pandas/tests/test_lib.py

+17
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,20 @@ def test_get_reverse_indexer(self):
203203
def test_cache_readonly_preserve_docstrings():
204204
# GH18197
205205
assert Index.hasnans.__doc__ is not None
206+
207+
208+
def test_fast_unique_multiple_list_with_sort():
209+
keys = [['p', 'a'], ['n', 'd'], ['a', 's']]
210+
211+
expected = np.array(['a', 'd', 'n', 'p', 's'])
212+
result = lib.fast_unique_multiple_list(keys, sort=True)
213+
tm.assert_numpy_array_equal(np.array(result), expected)
214+
215+
216+
def test_fast_unique_multiple_list_with_ordered():
217+
# GH8425
218+
keys = [['p', 'a'], ['n', 'd'], ['a', 's']]
219+
220+
expected = np.array(['p', 'a', 'n', 'd', 's'])
221+
result = lib.fast_unique_multiple_list(keys, sort=True, ordered=True)
222+
tm.assert_numpy_array_equal(np.array(result), expected)

0 commit comments

Comments
 (0)