Skip to content

Commit 23d8c1c

Browse files
authored
BUG: Series.to_dict does not return native Python types (#37648)
1 parent de55c3d commit 23d8c1c

File tree

7 files changed

+122
-28
lines changed

7 files changed

+122
-28
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ Numeric
325325

326326
Conversion
327327
^^^^^^^^^^
328+
- Bug in :meth:`Series.to_dict` with ``orient='records'`` now returns python native types (:issue:`25969`)
328329
-
329330
-
330331

pandas/core/dtypes/cast.py

+24
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
is_datetime64_dtype,
7272
is_datetime64_ns_dtype,
7373
is_datetime64tz_dtype,
74+
is_datetime_or_timedelta_dtype,
7475
is_dtype_equal,
7576
is_extension_array_dtype,
7677
is_float,
@@ -170,6 +171,29 @@ def maybe_box_datetimelike(value: Scalar, dtype: Optional[Dtype] = None) -> Scal
170171
return value
171172

172173

174+
def maybe_box_native(value: Scalar) -> Scalar:
175+
"""
176+
If passed a scalar cast the scalar to a python native type.
177+
178+
Parameters
179+
----------
180+
value : scalar or Series
181+
182+
Returns
183+
-------
184+
scalar or Series
185+
"""
186+
if is_datetime_or_timedelta_dtype(value):
187+
value = maybe_box_datetimelike(value)
188+
elif is_float(value):
189+
value = float(value)
190+
elif is_integer(value):
191+
value = int(value)
192+
elif is_bool(value):
193+
value = bool(value)
194+
return value
195+
196+
173197
def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
174198
"""
175199
Convert a Timedelta or Timestamp to timedelta64 or datetime64 for setting

pandas/core/frame.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@
9595
find_common_type,
9696
infer_dtype_from_scalar,
9797
invalidate_string_dtypes,
98-
maybe_box_datetimelike,
98+
maybe_box_native,
9999
maybe_convert_platform,
100100
maybe_downcast_to_dtype,
101101
maybe_infer_to_datetimelike,
@@ -1655,15 +1655,15 @@ def to_dict(self, orient: str = "dict", into=dict):
16551655
(
16561656
"data",
16571657
[
1658-
list(map(maybe_box_datetimelike, t))
1658+
list(map(maybe_box_native, t))
16591659
for t in self.itertuples(index=False, name=None)
16601660
],
16611661
),
16621662
)
16631663
)
16641664

16651665
elif orient == "series":
1666-
return into_c((k, maybe_box_datetimelike(v)) for k, v in self.items())
1666+
return into_c((k, v) for k, v in self.items())
16671667

16681668
elif orient == "records":
16691669
columns = self.columns.tolist()
@@ -1672,8 +1672,7 @@ def to_dict(self, orient: str = "dict", into=dict):
16721672
for row in self.itertuples(index=False, name=None)
16731673
)
16741674
return [
1675-
into_c((k, maybe_box_datetimelike(v)) for k, v in row.items())
1676-
for row in rows
1675+
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
16771676
]
16781677

16791678
elif orient == "index":

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858

5959
from pandas.core.dtypes.cast import (
6060
convert_dtypes,
61+
maybe_box_native,
6162
maybe_cast_to_extension_array,
6263
validate_numeric_casting,
6364
)
@@ -1591,7 +1592,7 @@ def to_dict(self, into=dict):
15911592
"""
15921593
# GH16122
15931594
into_c = com.standardize_mapping(into)
1594-
return into_c(self.items())
1595+
return into_c((k, maybe_box_native(v)) for k, v in self.items())
15951596

15961597
def to_frame(self, name=None) -> DataFrame:
15971598
"""
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from datetime import datetime
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas.core.dtypes.cast import maybe_box_native
7+
8+
from pandas import (
9+
Interval,
10+
Period,
11+
Timedelta,
12+
Timestamp,
13+
)
14+
15+
16+
@pytest.mark.parametrize(
17+
"obj,expected_dtype",
18+
[
19+
(b"\x00\x10", bytes),
20+
(int(4), int),
21+
(np.uint(4), int),
22+
(np.int32(-4), int),
23+
(np.uint8(4), int),
24+
(float(454.98), float),
25+
(np.float16(0.4), float),
26+
(np.float64(1.4), float),
27+
(np.bool_(False), bool),
28+
(datetime(2005, 2, 25), datetime),
29+
(np.datetime64("2005-02-25"), Timestamp),
30+
(Timestamp("2005-02-25"), Timestamp),
31+
(np.timedelta64(1, "D"), Timedelta),
32+
(Timedelta(1, "D"), Timedelta),
33+
(Interval(0, 1), Interval),
34+
(Period("4Q2005"), Period),
35+
],
36+
)
37+
def test_maybe_box_native(obj, expected_dtype):
38+
boxed_obj = maybe_box_native(obj)
39+
result_dtype = type(boxed_obj)
40+
assert result_dtype is expected_dtype

pandas/tests/frame/methods/test_to_dict.py

+35-22
Original file line numberDiff line numberDiff line change
@@ -263,31 +263,44 @@ def test_to_dict_wide(self):
263263
expected = {f"A_{i:d}": i for i in range(256)}
264264
assert result == expected
265265

266-
def test_to_dict_orient_dtype(self):
267-
# GH22620 & GH21256
268-
269-
df = DataFrame(
270-
{
271-
"bool": [True, True, False],
272-
"datetime": [
266+
@pytest.mark.parametrize(
267+
"data,dtype",
268+
(
269+
([True, True, False], bool),
270+
[
271+
[
273272
datetime(2018, 1, 1),
274273
datetime(2019, 2, 2),
275274
datetime(2020, 3, 3),
276275
],
277-
"float": [1.0, 2.0, 3.0],
278-
"int": [1, 2, 3],
279-
"str": ["X", "Y", "Z"],
280-
}
281-
)
276+
Timestamp,
277+
],
278+
[[1.0, 2.0, 3.0], float],
279+
[[1, 2, 3], int],
280+
[["X", "Y", "Z"], str],
281+
),
282+
)
283+
def test_to_dict_orient_dtype(self, data, dtype):
284+
# GH22620 & GH21256
282285

283-
expected = {
284-
"int": int,
285-
"float": float,
286-
"str": str,
287-
"datetime": Timestamp,
288-
"bool": bool,
289-
}
286+
df = DataFrame({"a": data})
287+
d = df.to_dict(orient="records")
288+
assert all(type(record["a"]) is dtype for record in d)
289+
290+
@pytest.mark.parametrize(
291+
"data,expected_dtype",
292+
(
293+
[np.uint64(2), int],
294+
[np.int64(-9), int],
295+
[np.float64(1.1), float],
296+
[np.bool_(True), bool],
297+
[np.datetime64("2005-02-25"), Timestamp],
298+
),
299+
)
300+
def test_to_dict_scalar_constructor_orient_dtype(self, data, expected_dtype):
301+
# GH22620 & GH21256
290302

291-
for df_dict in df.to_dict("records"):
292-
result = {col: type(df_dict[col]) for col in list(df.columns)}
293-
assert result == expected
303+
df = DataFrame({"a": data}, index=[0])
304+
d = df.to_dict(orient="records")
305+
result = type(d[0]["a"])
306+
assert result is expected_dtype

pandas/tests/series/methods/test_to_dict.py

+16
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import collections
22

3+
import numpy as np
34
import pytest
45

56
from pandas import Series
@@ -20,3 +21,18 @@ def test_to_dict(self, mapping, datetime_series):
2021
from_method = Series(datetime_series.to_dict(collections.Counter))
2122
from_constructor = Series(collections.Counter(datetime_series.items()))
2223
tm.assert_series_equal(from_method, from_constructor)
24+
25+
@pytest.mark.parametrize(
26+
"input",
27+
(
28+
{"a": np.int64(64), "b": 10},
29+
{"a": np.int64(64), "b": 10, "c": "ABC"},
30+
{"a": np.uint64(64), "b": 10, "c": "ABC"},
31+
),
32+
)
33+
def test_to_dict_return_types(self, input):
34+
# GH25969
35+
36+
d = Series(input).to_dict()
37+
assert isinstance(d["a"], int)
38+
assert isinstance(d["b"], int)

0 commit comments

Comments
 (0)