Skip to content

Commit e0699ca

Browse files
authored
BUG: to_dict should return a native datetime object for NumPy backed dataframes (#37571)
1 parent 0297710 commit e0699ca

File tree

11 files changed

+97
-66
lines changed

11 files changed

+97
-66
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ Numeric
435435
Conversion
436436
^^^^^^^^^^
437437

438-
-
438+
- Bug in :meth:`DataFrame.to_dict` with ``orient='records'`` now returns python native datetime objects for datetimelike columns (:issue:`21256`)
439439
-
440440

441441
Strings

pandas/core/arrays/sparse/array.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
construct_1d_arraylike_from_scalar,
2323
find_common_type,
2424
infer_dtype_from_scalar,
25+
maybe_box_datetimelike,
2526
)
2627
from pandas.core.dtypes.common import (
2728
is_array_like,
@@ -805,7 +806,7 @@ def _get_val_at(self, loc):
805806
return self.fill_value
806807
else:
807808
val = self.sp_values[sp_loc]
808-
val = com.maybe_box_datetimelike(val, self.sp_values.dtype)
809+
val = maybe_box_datetimelike(val, self.sp_values.dtype)
809810
return val
810811

811812
def take(self, indices, allow_fill=False, fill_value=None) -> "SparseArray":

pandas/core/common.py

+1-34
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,14 @@
66

77
from collections import abc, defaultdict
88
import contextlib
9-
from datetime import datetime, timedelta
109
from functools import partial
1110
import inspect
1211
from typing import Any, Collection, Iterable, Iterator, List, Union, cast
1312
import warnings
1413

1514
import numpy as np
1615

17-
from pandas._libs import lib, tslibs
16+
from pandas._libs import lib
1817
from pandas._typing import AnyArrayLike, Scalar, T
1918
from pandas.compat.numpy import np_version_under1p18
2019

@@ -78,21 +77,6 @@ def consensus_name_attr(objs):
7877
return name
7978

8079

81-
def maybe_box_datetimelike(value, dtype=None):
82-
# turn a datetime like into a Timestamp/timedelta as needed
83-
if dtype == object:
84-
# If we dont have datetime64/timedelta64 dtype, we dont want to
85-
# box datetimelike scalars
86-
return value
87-
88-
if isinstance(value, (np.datetime64, datetime)):
89-
value = tslibs.Timestamp(value)
90-
elif isinstance(value, (np.timedelta64, timedelta)):
91-
value = tslibs.Timedelta(value)
92-
93-
return value
94-
95-
9680
def is_bool_indexer(key: Any) -> bool:
9781
"""
9882
Check whether `key` is a valid boolean indexer.
@@ -347,23 +331,6 @@ def apply_if_callable(maybe_callable, obj, **kwargs):
347331
return maybe_callable
348332

349333

350-
def dict_compat(d):
351-
"""
352-
Helper function to convert datetimelike-keyed dicts
353-
to Timestamp-keyed dict.
354-
355-
Parameters
356-
----------
357-
d: dict like object
358-
359-
Returns
360-
-------
361-
dict
362-
363-
"""
364-
return {maybe_box_datetimelike(key): value for key, value in d.items()}
365-
366-
367334
def standardize_mapping(into):
368335
"""
369336
Helper function to standardize a supplied mapping.

pandas/core/dtypes/cast.py

+42-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing import (
88
TYPE_CHECKING,
99
Any,
10+
Dict,
1011
List,
1112
Optional,
1213
Sequence,
@@ -19,7 +20,7 @@
1920

2021
import numpy as np
2122

22-
from pandas._libs import lib, tslib
23+
from pandas._libs import lib, tslib, tslibs
2324
from pandas._libs.tslibs import (
2425
NaT,
2526
OutOfBoundsDatetime,
@@ -134,6 +135,30 @@ def is_nested_object(obj) -> bool:
134135
return False
135136

136137

138+
def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scalar:
139+
"""
140+
Cast scalar to Timestamp or Timedelta if scalar is datetime-like
141+
and dtype is not object.
142+
143+
Parameters
144+
----------
145+
value : scalar
146+
dtype : Dtype, optional
147+
148+
Returns
149+
-------
150+
scalar
151+
"""
152+
if dtype == object:
153+
pass
154+
elif isinstance(value, (np.datetime64, datetime)):
155+
value = tslibs.Timestamp(value)
156+
elif isinstance(value, (np.timedelta64, timedelta)):
157+
value = tslibs.Timedelta(value)
158+
159+
return value
160+
161+
137162
def maybe_downcast_to_dtype(result, dtype: Union[str, np.dtype]):
138163
"""
139164
try to cast to the specified dtype (e.g. convert back to bool/int
@@ -791,6 +816,22 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
791816
return dtype, val
792817

793818

819+
def dict_compat(d: Dict[Scalar, Scalar]) -> Dict[Scalar, Scalar]:
820+
"""
821+
Convert datetimelike-keyed dicts to a Timestamp-keyed dict.
822+
823+
Parameters
824+
----------
825+
d: dict-like object
826+
827+
Returns
828+
-------
829+
dict
830+
831+
"""
832+
return {maybe_box_datetimelike(key): value for key, value in d.items()}
833+
834+
794835
def infer_dtype_from_array(
795836
arr, pandas_dtype: bool = False
796837
) -> Tuple[DtypeObj, ArrayLike]:

pandas/core/frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
find_common_type,
8585
infer_dtype_from_scalar,
8686
invalidate_string_dtypes,
87+
maybe_box_datetimelike,
8788
maybe_cast_to_datetime,
8889
maybe_casted_values,
8990
maybe_convert_platform,
@@ -1538,15 +1539,15 @@ def to_dict(self, orient="dict", into=dict):
15381539
(
15391540
"data",
15401541
[
1541-
list(map(com.maybe_box_datetimelike, t))
1542+
list(map(maybe_box_datetimelike, t))
15421543
for t in self.itertuples(index=False, name=None)
15431544
],
15441545
),
15451546
)
15461547
)
15471548

15481549
elif orient == "series":
1549-
return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items())
1550+
return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items())
15501551

15511552
elif orient == "records":
15521553
columns = self.columns.tolist()
@@ -1555,7 +1556,7 @@ def to_dict(self, orient="dict", into=dict):
15551556
for row in self.itertuples(index=False, name=None)
15561557
)
15571558
return [
1558-
into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items())
1559+
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
15591560
for row in rows
15601561
]
15611562

pandas/core/indexes/interval.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from pandas.core.dtypes.cast import (
2020
find_common_type,
2121
infer_dtype_from_scalar,
22+
maybe_box_datetimelike,
2223
maybe_downcast_to_dtype,
2324
)
2425
from pandas.core.dtypes.common import (
@@ -1193,8 +1194,8 @@ def interval_range(
11931194
IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
11941195
closed='both', dtype='interval[int64]')
11951196
"""
1196-
start = com.maybe_box_datetimelike(start)
1197-
end = com.maybe_box_datetimelike(end)
1197+
start = maybe_box_datetimelike(start)
1198+
end = maybe_box_datetimelike(end)
11981199
endpoint = start if start is not None else end
11991200

12001201
if freq is None and com.any_none(periods, start, end):

pandas/core/internals/blocks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
find_common_type,
2020
infer_dtype_from,
2121
infer_dtype_from_scalar,
22+
maybe_box_datetimelike,
2223
maybe_downcast_numeric,
2324
maybe_downcast_to_dtype,
2425
maybe_infer_dtype_type,
@@ -843,7 +844,7 @@ def comp(s: Scalar, mask: np.ndarray, regex: bool = False) -> np.ndarray:
843844
if isna(s):
844845
return ~mask
845846

846-
s = com.maybe_box_datetimelike(s)
847+
s = maybe_box_datetimelike(s)
847848
return compare_or_regex_search(self.values, s, regex, mask)
848849

849850
# Calculate the mask once, prior to the call of comp

pandas/core/internals/construction.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.core.dtypes.cast import (
1515
construct_1d_arraylike_from_scalar,
1616
construct_1d_ndarray_preserving_na,
17+
dict_compat,
1718
maybe_cast_to_datetime,
1819
maybe_convert_platform,
1920
maybe_infer_to_datetimelike,
@@ -346,7 +347,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
346347
oindex = index.astype("O")
347348

348349
if isinstance(index, (ABCDatetimeIndex, ABCTimedeltaIndex)):
349-
val = com.dict_compat(val)
350+
val = dict_compat(val)
350351
else:
351352
val = dict(val)
352353
val = lib.fast_multiget(val, oindex._values, default=np.nan)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import numpy as np
2+
3+
from pandas.core.dtypes.cast import dict_compat
4+
5+
from pandas import Timestamp
6+
7+
8+
def test_dict_compat():
9+
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
10+
data_unchanged = {1: 2, 3: 4, 5: 6}
11+
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
12+
assert dict_compat(data_datetime64) == expected
13+
assert dict_compat(expected) == expected
14+
assert dict_compat(data_unchanged) == data_unchanged

pandas/tests/frame/methods/test_to_dict.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -257,17 +257,30 @@ def test_to_dict_wide(self):
257257
assert result == expected
258258

259259
def test_to_dict_orient_dtype(self):
260-
# GH#22620
261-
# Input Data
262-
input_data = {"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["X", "Y", "Z"]}
263-
df = DataFrame(input_data)
264-
# Expected Dtypes
265-
expected = {"a": int, "b": float, "c": str}
266-
# Extracting dtypes out of to_dict operation
267-
for df_dict in df.to_dict("records"):
268-
result = {
269-
"a": type(df_dict["a"]),
270-
"b": type(df_dict["b"]),
271-
"c": type(df_dict["c"]),
260+
# GH22620 & GH21256
261+
262+
df = DataFrame(
263+
{
264+
"bool": [True, True, False],
265+
"datetime": [
266+
datetime(2018, 1, 1),
267+
datetime(2019, 2, 2),
268+
datetime(2020, 3, 3),
269+
],
270+
"float": [1.0, 2.0, 3.0],
271+
"int": [1, 2, 3],
272+
"str": ["X", "Y", "Z"],
272273
}
274+
)
275+
276+
expected = {
277+
"int": int,
278+
"float": float,
279+
"str": str,
280+
"datetime": Timestamp,
281+
"bool": bool,
282+
}
283+
284+
for df_dict in df.to_dict("records"):
285+
result = {col: type(df_dict[col]) for col in list(df.columns)}
273286
assert result == expected

pandas/tests/test_common.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas.compat.numpy import np_version_under1p17
1010

1111
import pandas as pd
12-
from pandas import Series, Timestamp
12+
from pandas import Series
1313
import pandas._testing as tm
1414
from pandas.core import ops
1515
import pandas.core.common as com
@@ -109,15 +109,6 @@ def test_maybe_match_name(left, right, expected):
109109
assert ops.common._maybe_match_name(left, right) == expected
110110

111111

112-
def test_dict_compat():
113-
data_datetime64 = {np.datetime64("1990-03-15"): 1, np.datetime64("2015-03-15"): 2}
114-
data_unchanged = {1: 2, 3: 4, 5: 6}
115-
expected = {Timestamp("1990-3-15"): 1, Timestamp("2015-03-15"): 2}
116-
assert com.dict_compat(data_datetime64) == expected
117-
assert com.dict_compat(expected) == expected
118-
assert com.dict_compat(data_unchanged) == data_unchanged
119-
120-
121112
def test_standardize_mapping():
122113
# No uninitialized defaultdicts
123114
msg = r"to_dict\(\) only accepts initialized defaultdicts"

0 commit comments

Comments
 (0)