Skip to content

BUG: GH10160 in DataFrame construction from dict with datetime64 index #10269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions doc/source/whatsnew/v0.16.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -160,4 +160,10 @@ Bug Fixes
- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`)
- Bug to handle masking empty ``DataFrame``(:issue:`10126`)

- Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`)

- Bug in ``DataFrame`` construction from nested ``dict`` with ``datetime64`` (:issue:`10160`)

- Bug in ``Series`` construction from ``dict`` with ``datetime64`` keys (:issue:`9456`)

16 changes: 15 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import pandas.lib as lib
import pandas.tslib as tslib
from pandas import compat
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems

from pandas.core.config import get_option

Expand Down Expand Up @@ -3361,3 +3361,17 @@ def _random_state(state=None):
return np.random.RandomState()
else:
raise ValueError("random_state must be an integer, a numpy RandomState, or None")

def _dict_compat(d):
"""
Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict
Parameters
----------
d: dict like object
Returns
__________
dict
"""
return dict((_maybe_box_datetimelike(key), value) for key, value in iteritems(d))
13 changes: 4 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
_infer_dtype_from_scalar, _values_from_object,
is_list_like, _maybe_box_datetimelike,
is_categorical_dtype, is_object_dtype,
_possibly_infer_to_datetimelike)
_possibly_infer_to_datetimelike, _dict_compat)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels,
Expand Down Expand Up @@ -5099,14 +5099,9 @@ def _homogenize(data, index, dtype=None):
v = v.reindex(index, copy=False)
else:
if isinstance(v, dict):
if oindex is None:
oindex = index.astype('O')
if type(v) == dict:
# fast cython method
v = lib.fast_multiget(v, oindex.values, default=NA)
else:
v = lib.map_infer(oindex.values, v.get)

v = _dict_compat(v)
oindex = index.astype('O')
v = lib.fast_multiget(v, oindex.values, default=NA)
v = _sanitize_array(v, index, dtype=dtype, copy=False,
raise_cast_failure=False)

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
_possibly_convert_platform, _try_sort,
ABCSparseArray, _maybe_match_name,
_coerce_to_dtype, SettingWithCopyError,
_maybe_box_datetimelike, ABCDataFrame)
_maybe_box_datetimelike, ABCDataFrame,
_dict_compat)
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
_ensure_index)
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
Expand Down Expand Up @@ -168,6 +169,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
try:
if isinstance(index, DatetimeIndex):
# coerce back to datetime objects for lookup
data = _dict_compat(data)
data = lib.fast_multiget(data, index.astype('O'),
default=np.nan)
elif isinstance(index, PeriodIndex):
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1018,6 +1018,16 @@ def test_maybe_convert_string_to_array(self):
self.assertTrue(result.dtype == object)


def test_dict_compat():
data_datetime64 = {np.datetime64('1990-03-15'): 1,
np.datetime64('2015-03-15'): 2}
data_unchanged = {1: 2, 3: 4, 5: 6}
expected = {Timestamp('1990-3-15'): 1, Timestamp('2015-03-15'): 2}
assert(com._dict_compat(data_datetime64) == expected)
assert(com._dict_compat(expected) == expected)
assert(com._dict_compat(data_unchanged) == data_unchanged)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
25 changes: 25 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2960,6 +2960,31 @@ def test_constructor_dict_multiindex(self):
df = df.reindex(columns=expected.columns, index=expected.index)
check(df, expected)

def test_constructor_dict_datetime64_index(self):
# GH 10160
dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add the issue number as a comment here

def create_data(constructor):
return dict((i, {constructor(s): 2*i}) for i, s in enumerate(dates_as_str))

data_datetime64 = create_data(np.datetime64)
data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
data_Timestamp = create_data(Timestamp)

expected = DataFrame([{0: 0, 1: None, 2: None, 3: None},
{0: None, 1: 2, 2: None, 3: None},
{0: None, 1: None, 2: 4, 3: None},
{0: None, 1: None, 2: None, 3: 6}],
index=[Timestamp(dt) for dt in dates_as_str])

result_datetime64 = DataFrame(data_datetime64)
result_datetime = DataFrame(data_datetime)
result_Timestamp = DataFrame(data_Timestamp)
assert_frame_equal(result_datetime64, expected)
assert_frame_equal(result_datetime, expected)
assert_frame_equal(result_Timestamp, expected)


def _check_basic_constructor(self, empty):
"mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects"
mat = empty((2, 3), dtype=float)
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,29 @@ def test_constructor_subclass_dict(self):
refseries = Series(dict(compat.iteritems(data)))
assert_series_equal(refseries, series)

def test_constructor_dict_datetime64_index(self):
# GH 9456

dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
values = [42544017.198965244, 1234565, 40512335.181958228, -1]

def create_data(constructor):
return dict(zip((constructor(x) for x in dates_as_str), values))

data_datetime64 = create_data(np.datetime64)
data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
data_Timestamp = create_data(Timestamp)

expected = Series(values, (Timestamp(x) for x in dates_as_str))

result_datetime64 = Series(data_datetime64)
result_datetime = Series(data_datetime)
result_Timestamp = Series(data_Timestamp)

assert_series_equal(result_datetime64, expected)
assert_series_equal(result_datetime, expected)
assert_series_equal(result_Timestamp, expected)

def test_orderedDict_ctor(self):
# GH3283
import pandas
Expand Down