Skip to content

Commit fcb78dc

Browse files
committed
PERF: nested dict DataFrame construction
1 parent 9e7dc17 commit fcb78dc

File tree

4 files changed

+32
-23
lines changed

4 files changed

+32
-23
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,7 @@ Performance Improvements
10321032
- Improved performance of ``to_datetime`` when specified format string is ISO8601 (:issue:`10178`)
10331033
- 2x improvement of ``Series.value_counts`` for float dtype (:issue:`10821`)
10341034
- Enable ``infer_datetime_format`` in ``to_datetime`` when date components do not have 0 padding (:issue:`11142`)
1035+
- Regression from 0.16.1 in constructing ``DataFrame`` from nested dictionary (:issue:`11084`)
10351036

10361037
.. _whatsnew_0170.bug_fixes:
10371038

pandas/core/frame.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252

5353
from pandas.tseries.period import PeriodIndex
5454
from pandas.tseries.index import DatetimeIndex
55+
from pandas.tseries.tdi import TimedeltaIndex
56+
5557

5658
import pandas.core.algorithms as algos
5759
import pandas.core.base as base
@@ -5400,8 +5402,13 @@ def _homogenize(data, index, dtype=None):
54005402
v = v.reindex(index, copy=False)
54015403
else:
54025404
if isinstance(v, dict):
5403-
v = _dict_compat(v)
5404-
oindex = index.astype('O')
5405+
if oindex is None:
5406+
oindex = index.astype('O')
5407+
5408+
if isinstance(index, (DatetimeIndex, TimedeltaIndex)):
5409+
v = _dict_compat(v)
5410+
else:
5411+
v = dict(v)
54055412
v = lib.fast_multiget(v, oindex.values, default=NA)
54065413
v = _sanitize_array(v, index, dtype=dtype, copy=False,
54075414
raise_cast_failure=False)

pandas/tests/test_frame.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from pandas import (DataFrame, Index, Series, Panel, notnull, isnull,
3535
MultiIndex, DatetimeIndex, Timestamp, date_range,
3636
read_csv, timedelta_range, Timedelta, CategoricalIndex,
37-
option_context)
37+
option_context, period_range)
3838
from pandas.core.dtypes import DatetimeTZDtype
3939
import pandas as pd
4040
from pandas.parser import CParserError
@@ -3061,6 +3061,27 @@ def create_data(constructor):
30613061
assert_frame_equal(result_timedelta, expected)
30623062
assert_frame_equal(result_Timedelta, expected)
30633063

3064+
def test_nested_dict_frame_constructor(self):
3065+
rng = period_range('1/1/2000', periods=5)
3066+
df = DataFrame(randn(10, 5), columns=rng)
3067+
3068+
data = {}
3069+
for col in df.columns:
3070+
for row in df.index:
3071+
data.setdefault(col, {})[row] = df.get_value(row, col)
3072+
3073+
result = DataFrame(data, columns=rng)
3074+
tm.assert_frame_equal(result, df)
3075+
3076+
data = {}
3077+
for col in df.columns:
3078+
for row in df.index:
3079+
data.setdefault(row, {})[col] = df.get_value(row, col)
3080+
3081+
result = DataFrame(data, index=rng).T
3082+
tm.assert_frame_equal(result, df)
3083+
3084+
30643085
def _check_basic_constructor(self, empty):
30653086
"mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects"
30663087
mat = empty((2, 3), dtype=float)

pandas/tseries/tests/test_period.py

-20
Original file line numberDiff line numberDiff line change
@@ -2075,26 +2075,6 @@ def test_period_set_index_reindex(self):
20752075
df = df.set_index(idx2)
20762076
self.assertTrue(df.index.equals(idx2))
20772077

2078-
def test_nested_dict_frame_constructor(self):
2079-
rng = period_range('1/1/2000', periods=5)
2080-
df = DataFrame(randn(10, 5), columns=rng)
2081-
2082-
data = {}
2083-
for col in df.columns:
2084-
for row in df.index:
2085-
data.setdefault(col, {})[row] = df.get_value(row, col)
2086-
2087-
result = DataFrame(data, columns=rng)
2088-
tm.assert_frame_equal(result, df)
2089-
2090-
data = {}
2091-
for col in df.columns:
2092-
for row in df.index:
2093-
data.setdefault(row, {})[col] = df.get_value(row, col)
2094-
2095-
result = DataFrame(data, index=rng).T
2096-
tm.assert_frame_equal(result, df)
2097-
20982078
def test_frame_to_time_stamp(self):
20992079
K = 5
21002080
index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')

0 commit comments

Comments
 (0)