Skip to content

Commit 821542f

Browse files
kawochenjreback
authored andcommitted
BUG: #9456 Series construction from dict with datetime64 keys
BUG: #10160 DataFrame construction from nested dict with datetime64 index
1 parent 07ea11c commit 821542f

File tree

7 files changed

+85
-14
lines changed

7 files changed

+85
-14
lines changed

doc/source/whatsnew/v0.16.2.txt

+4-2
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ Bug Fixes
149149
- Bug in ``GroupBy.get_group`` when grouping on multiple keys, one of which is categorical. (:issue:`10132`)
150150

151151
- Bug in ``DatetimeIndex`` and ``TimedeltaIndex`` names are lost after timedelta arithmetics ( :issue:`9926`)
152-
153-
152+
- Bug in ``DataFrame`` construction from nested ``dict`` with ``datetime64`` (:issue:`10160`)
153+
- Bug in ``Series`` construction from ``dict`` with ``datetime64`` keys (:issue:`9456`)
154154
- Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
155155

156156
- Bug in `plot` not defaulting to matplotlib `axes.grid` setting (:issue:`9792`)
@@ -170,5 +170,7 @@ Bug Fixes
170170
- Bug where infer_freq infers timerule (WOM-5XXX) unsupported by to_offset (:issue:`9425`)
171171
- Bug in ``DataFrame.to_hdf()`` where table format would raise a seemingly unrelated error for invalid (non-string) column names. This is now explicitly forbidden. (:issue:`9057`)
172172
- Bug to handle masking empty ``DataFrame``(:issue:`10126`)
173+
173174
- Bug where MySQL interface could not handle numeric table/column names (:issue:`10255`)
175+
174176
- Bug in ``read_csv`` with a ``date_parser`` that returned a ``datetime64`` array of other time resolution than ``[ns]`` (:issue:`10245`).

pandas/core/common.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
import pandas.lib as lib
2020
import pandas.tslib as tslib
2121
from pandas import compat
22-
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types
22+
from pandas.compat import StringIO, BytesIO, range, long, u, zip, map, string_types, iteritems
2323

2424
from pandas.core.config import get_option
2525

@@ -3028,14 +3028,28 @@ def _where_compat(mask, arr1, arr2):
30283028

30293029
return np.where(mask, arr1, arr2)
30303030

3031+
def _dict_compat(d):
3032+
"""
3033+
Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict
3034+
3035+
Parameters
3036+
----------
3037+
d: dict like object
3038+
3039+
Returns
3040+
-------
3041+
dict
3042+
3043+
"""
3044+
return dict((_maybe_box_datetimelike(key), value) for key, value in iteritems(d))
30313045

30323046
def sentinel_factory():
3047+
30333048
class Sentinel(object):
30343049
pass
30353050

30363051
return Sentinel()
30373052

3038-
30393053
def in_interactive_session():
30403054
""" check if we're running in an interactive shell
30413055

pandas/core/frame.py

+4-9
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
_infer_dtype_from_scalar, _values_from_object,
2929
is_list_like, _maybe_box_datetimelike,
3030
is_categorical_dtype, is_object_dtype,
31-
_possibly_infer_to_datetimelike)
31+
_possibly_infer_to_datetimelike, _dict_compat)
3232
from pandas.core.generic import NDFrame, _shared_docs
3333
from pandas.core.index import Index, MultiIndex, _ensure_index
3434
from pandas.core.indexing import (maybe_droplevels,
@@ -5099,14 +5099,9 @@ def _homogenize(data, index, dtype=None):
50995099
v = v.reindex(index, copy=False)
51005100
else:
51015101
if isinstance(v, dict):
5102-
if oindex is None:
5103-
oindex = index.astype('O')
5104-
if type(v) == dict:
5105-
# fast cython method
5106-
v = lib.fast_multiget(v, oindex.values, default=NA)
5107-
else:
5108-
v = lib.map_infer(oindex.values, v.get)
5109-
5102+
v = _dict_compat(v)
5103+
oindex = index.astype('O')
5104+
v = lib.fast_multiget(v, oindex.values, default=NA)
51105105
v = _sanitize_array(v, index, dtype=dtype, copy=False,
51115106
raise_cast_failure=False)
51125107

pandas/core/series.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121
_possibly_convert_platform, _try_sort,
2222
ABCSparseArray, _maybe_match_name,
2323
_coerce_to_dtype, SettingWithCopyError,
24-
_maybe_box_datetimelike, ABCDataFrame)
24+
_maybe_box_datetimelike, ABCDataFrame,
25+
_dict_compat)
2526
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
2627
_ensure_index)
2728
from pandas.core.indexing import check_bool_indexer, maybe_convert_indices
@@ -168,6 +169,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
168169
try:
169170
if isinstance(index, DatetimeIndex):
170171
# coerce back to datetime objects for lookup
172+
data = _dict_compat(data)
171173
data = lib.fast_multiget(data, index.astype('O'),
172174
default=np.nan)
173175
elif isinstance(index, PeriodIndex):

pandas/tests/test_common.py

+10
Original file line numberDiff line numberDiff line change
@@ -1018,6 +1018,16 @@ def test_maybe_convert_string_to_array(self):
10181018
self.assertTrue(result.dtype == object)
10191019

10201020

1021+
def test_dict_compat():
1022+
data_datetime64 = {np.datetime64('1990-03-15'): 1,
1023+
np.datetime64('2015-03-15'): 2}
1024+
data_unchanged = {1: 2, 3: 4, 5: 6}
1025+
expected = {Timestamp('1990-3-15'): 1, Timestamp('2015-03-15'): 2}
1026+
assert(com._dict_compat(data_datetime64) == expected)
1027+
assert(com._dict_compat(expected) == expected)
1028+
assert(com._dict_compat(data_unchanged) == data_unchanged)
1029+
1030+
10211031
if __name__ == '__main__':
10221032
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
10231033
exit=False)

pandas/tests/test_frame.py

+25
Original file line numberDiff line numberDiff line change
@@ -2960,6 +2960,31 @@ def test_constructor_dict_multiindex(self):
29602960
df = df.reindex(columns=expected.columns, index=expected.index)
29612961
check(df, expected)
29622962

2963+
def test_constructor_dict_datetime64_index(self):
2964+
# GH 10160
2965+
dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
2966+
2967+
def create_data(constructor):
2968+
return dict((i, {constructor(s): 2*i}) for i, s in enumerate(dates_as_str))
2969+
2970+
data_datetime64 = create_data(np.datetime64)
2971+
data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
2972+
data_Timestamp = create_data(Timestamp)
2973+
2974+
expected = DataFrame([{0: 0, 1: None, 2: None, 3: None},
2975+
{0: None, 1: 2, 2: None, 3: None},
2976+
{0: None, 1: None, 2: 4, 3: None},
2977+
{0: None, 1: None, 2: None, 3: 6}],
2978+
index=[Timestamp(dt) for dt in dates_as_str])
2979+
2980+
result_datetime64 = DataFrame(data_datetime64)
2981+
result_datetime = DataFrame(data_datetime)
2982+
result_Timestamp = DataFrame(data_Timestamp)
2983+
assert_frame_equal(result_datetime64, expected)
2984+
assert_frame_equal(result_datetime, expected)
2985+
assert_frame_equal(result_Timestamp, expected)
2986+
2987+
29632988
def _check_basic_constructor(self, empty):
29642989
"mat: 2d matrix with shpae (3, 2) to input. empty - makes sized objects"
29652990
mat = empty((2, 3), dtype=float)

pandas/tests/test_series.py

+23
Original file line numberDiff line numberDiff line change
@@ -980,6 +980,29 @@ def test_constructor_subclass_dict(self):
980980
refseries = Series(dict(compat.iteritems(data)))
981981
assert_series_equal(refseries, series)
982982

983+
def test_constructor_dict_datetime64_index(self):
984+
# GH 9456
985+
986+
dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
987+
values = [42544017.198965244, 1234565, 40512335.181958228, -1]
988+
989+
def create_data(constructor):
990+
return dict(zip((constructor(x) for x in dates_as_str), values))
991+
992+
data_datetime64 = create_data(np.datetime64)
993+
data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
994+
data_Timestamp = create_data(Timestamp)
995+
996+
expected = Series(values, (Timestamp(x) for x in dates_as_str))
997+
998+
result_datetime64 = Series(data_datetime64)
999+
result_datetime = Series(data_datetime)
1000+
result_Timestamp = Series(data_Timestamp)
1001+
1002+
assert_series_equal(result_datetime64, expected)
1003+
assert_series_equal(result_datetime, expected)
1004+
assert_series_equal(result_Timestamp, expected)
1005+
9831006
def test_orderedDict_ctor(self):
9841007
# GH3283
9851008
import pandas

0 commit comments

Comments
 (0)