From c85122ba921082bdb5d4e7107174204f48697a60 Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Fri, 5 Jun 2015 07:15:05 -0400 Subject: [PATCH] BUG: GH9456/10160 in Series/DataFrame construction from datetime64 dict BUG: GH9456/10160 in Series/DataFrame construction from datetime64 dict Added _dict_compat to deal with datetime64-keyed dict BUG: GH9456 Series construction from dict with datetime64 keys BUG: GH10160 DataFrame construction from nested dict with datetime64 index removed import --- doc/source/whatsnew/v0.16.2.txt | 6 ++++++ pandas/core/common.py | 16 +++++++++++++++- pandas/core/frame.py | 13 ++++--------- pandas/core/series.py | 4 +++- pandas/tests/test_common.py | 10 ++++++++++ pandas/tests/test_frame.py | 25 +++++++++++++++++++++++++ pandas/tests/test_series.py | 23 +++++++++++++++++++++++ 7 files changed, 86 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.16.2.txt b/doc/source/whatsnew/v0.16.2.txt index e9ecdd60d7eed..62030b276523c 100644 --- a/doc/source/whatsnew/v0.16.2.txt +++ b/doc/source/whatsnew/v0.16.2.txt @@ -160,4 +160,10 @@ Bug Fixes - Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`) - Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`) - Bug to handle masking empty ``DataFrame``(:issue:`10126`) + - Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`) + +- Bug in ``DataFrame`` construction from nested ``dict`` with ``datetime64`` (:issue:`10160`) + +- Bug in ``Series`` construction from ``dict`` with ``datetime64`` keys (:issue:`9456`) + diff --git a/pandas/core/common.py b/pandas/core/common.py index 1c9326c047a79..c6393abb2758b 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -19,7 +19,7 @@ import pandas.lib as lib import pandas.tslib as tslib from pandas import compat -from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types +from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems from pandas.core.config import get_option @@ -3361,3 +3361,17 @@ def _random_state(state=None): return np.random.RandomState() else: raise ValueError("random_state must be an integer, a numpy RandomState, or None") + +def _dict_compat(d): + """ + Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict + + Parameters + ---------- + d: dict like object + + Returns + __________ + dict + """ + return dict((_maybe_box_datetimelike(key), value) for key, value in iteritems(d)) \ No newline at end of file diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2b434c98d8482..346b518428d47 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -28,7 +28,7 @@ _infer_dtype_from_scalar, _values_from_object, is_list_like, _maybe_box_datetimelike, is_categorical_dtype, is_object_dtype, - _possibly_infer_to_datetimelike) + _possibly_infer_to_datetimelike, _dict_compat) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.core.indexing import (maybe_droplevels, @@ -5099,14 +5099,9 @@ def _homogenize(data, index, dtype=None): v = v.reindex(index, copy=False) else: if isinstance(v, dict): - if oindex is None: - oindex = index.astype('O') - if type(v) == dict: - # fast cython method - v = lib.fast_multiget(v, oindex.values, default=NA) - else: - v = lib.map_infer(oindex.values, v.get) - + v = _dict_compat(v) + oindex = index.astype('O') + v = lib.fast_multiget(v, oindex.values, default=NA) v = _sanitize_array(v, index, dtype=dtype, copy=False, raise_cast_failure=False) diff --git a/pandas/core/series.py b/pandas/core/series.py index c54bd96f64c73..dfbc5dbf84572 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,8 @@ _possibly_convert_platform, _try_sort, ABCSparseArray, _maybe_match_name, _coerce_to_dtype, SettingWithCopyError, - _maybe_box_datetimelike, ABCDataFrame) + _maybe_box_datetimelike, ABCDataFrame, + _dict_compat) from pandas.core.index import (Index, MultiIndex, InvalidIndexError, _ensure_index) from pandas.core.indexing import check_bool_indexer, maybe_convert_indices @@ -168,6 +169,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, try: if isinstance(index, DatetimeIndex): # coerce back to datetime objects for lookup + data = _dict_compat(data) data = lib.fast_multiget(data, index.astype('O'), default=np.nan) elif isinstance(index, PeriodIndex): diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index c3d39fcdf906f..9ac7083289461 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1018,6 +1018,16 @@ def test_maybe_convert_string_to_array(self): self.assertTrue(result.dtype == object) +def test_dict_compat(): + data_datetime64 = {np.datetime64('1990-03-15'): 1, + np.datetime64('2015-03-15'): 2} + data_unchanged = {1: 2, 3: 4, 5: 6} + expected = {Timestamp('1990-3-15'): 1, Timestamp('2015-03-15'): 2} + assert(com._dict_compat(data_datetime64) == expected) + assert(com._dict_compat(expected) == expected) + assert(com._dict_compat(data_unchanged) == data_unchanged) + + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index e6571e83cc21b..4b1954a3be64e 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2960,6 +2960,31 @@ def test_constructor_dict_multiindex(self): df = df.reindex(columns=expected.columns, index=expected.index) check(df, expected) + def test_constructor_dict_datetime64_index(self): + # GH 10160 + dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + + def create_data(constructor): + return dict((i, {constructor(s): 2*i}) for i, s in enumerate(dates_as_str)) + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d')) + data_Timestamp = create_data(Timestamp) + + expected = DataFrame([{0: 0, 1: None, 2: None, 3: None}, + {0: None, 1: 2, 2: None, 3: None}, + {0: None, 1: None, 2: 4, 3: None}, + {0: None, 1: None, 2: None, 3: 6}], + index=[Timestamp(dt) for dt in dates_as_str]) + + result_datetime64 = DataFrame(data_datetime64) + result_datetime = DataFrame(data_datetime) + result_Timestamp = DataFrame(data_Timestamp) + assert_frame_equal(result_datetime64, expected) + assert_frame_equal(result_datetime, expected) + assert_frame_equal(result_Timestamp, expected) + + def _check_basic_constructor(self, empty): "mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects" mat = empty((2, 3), dtype=float) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index c7a8379c15da6..ff0d5739588f2 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -980,6 +980,29 @@ def test_constructor_subclass_dict(self): refseries = Series(dict(compat.iteritems(data))) assert_series_equal(refseries, series) + def test_constructor_dict_datetime64_index(self): + # GH 9456 + + dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] + values = [42544017.198965244, 1234565, 40512335.181958228, -1] + + def create_data(constructor): + return dict(zip((constructor(x) for x in dates_as_str), values)) + + data_datetime64 = create_data(np.datetime64) + data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d')) + data_Timestamp = create_data(Timestamp) + + expected = Series(values, (Timestamp(x) for x in dates_as_str)) + + result_datetime64 = Series(data_datetime64) + result_datetime = Series(data_datetime) + result_Timestamp = Series(data_Timestamp) + + assert_series_equal(result_datetime64, expected) + assert_series_equal(result_datetime, expected) + assert_series_equal(result_Timestamp, expected) + def test_orderedDict_ctor(self): # GH3283 import pandas