Skip to content

Commit b888dad

Browse files
authored
BUG: to_dict not converting NA to None (#50796)
1 parent 0a0372a commit b888dad

File tree

5 files changed

+53
-13
lines changed

5 files changed

+53
-13
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ I/O
10201020
- Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`)
10211021
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
10221022
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
1023+
- Bug in :meth:`DataFrame.to_dict` not converting ``NA`` to ``None`` (:issue:`50795`)
10231024
- Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`)
10241025

10251026
Period

pandas/core/dtypes/cast.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
import numpy as np
2121

2222
from pandas._libs import lib
23+
from pandas._libs.missing import (
24+
NA,
25+
NAType,
26+
)
2327
from pandas._libs.tslibs import (
2428
NaT,
2529
OutOfBoundsDatetime,
@@ -176,7 +180,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar:
176180
return value
177181

178182

179-
def maybe_box_native(value: Scalar) -> Scalar:
183+
def maybe_box_native(value: Scalar | None | NAType) -> Scalar | None | NAType:
180184
"""
181185
If passed a scalar cast the scalar to a python native type.
182186
@@ -202,6 +206,8 @@ def maybe_box_native(value: Scalar) -> Scalar:
202206
value = bool(value)
203207
elif isinstance(value, (np.datetime64, np.timedelta64)):
204208
value = maybe_box_datetimelike(value)
209+
elif value is NA:
210+
value = None
205211
return value
206212

207213

pandas/core/methods/to_dict.py

+16-11
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66
from pandas.util._exceptions import find_stack_level
77

88
from pandas.core.dtypes.cast import maybe_box_native
9-
from pandas.core.dtypes.common import is_object_dtype
9+
from pandas.core.dtypes.common import (
10+
is_extension_array_dtype,
11+
is_object_dtype,
12+
)
1013

1114
from pandas import DataFrame
1215
from pandas.core import common as com
@@ -88,16 +91,18 @@ def to_dict(
8891
# GH46470 Return quickly if orient series to avoid creating dtype objects
8992
return into_c((k, v) for k, v in df.items())
9093

91-
object_dtype_indices = [
92-
i for i, col_dtype in enumerate(df.dtypes.values) if is_object_dtype(col_dtype)
94+
box_native_indices = [
95+
i
96+
for i, col_dtype in enumerate(df.dtypes.values)
97+
if is_object_dtype(col_dtype) or is_extension_array_dtype(col_dtype)
9398
]
94-
are_all_object_dtype_cols = len(object_dtype_indices) == len(df.dtypes)
99+
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
95100

96101
if orient == "dict":
97102
return into_c((k, v.to_dict(into)) for k, v in df.items())
98103

99104
elif orient == "list":
100-
object_dtype_indices_as_set = set(object_dtype_indices)
105+
object_dtype_indices_as_set = set(box_native_indices)
101106
return into_c(
102107
(
103108
k,
@@ -110,7 +115,7 @@ def to_dict(
110115

111116
elif orient == "split":
112117
data = df._create_data_for_split_and_tight_to_dict(
113-
are_all_object_dtype_cols, object_dtype_indices
118+
are_all_object_dtype_cols, box_native_indices
114119
)
115120

116121
return into_c(
@@ -123,7 +128,7 @@ def to_dict(
123128

124129
elif orient == "tight":
125130
data = df._create_data_for_split_and_tight_to_dict(
126-
are_all_object_dtype_cols, object_dtype_indices
131+
are_all_object_dtype_cols, box_native_indices
127132
)
128133

129134
return into_c(
@@ -155,8 +160,8 @@ def to_dict(
155160
data = [
156161
into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
157162
]
158-
if object_dtype_indices:
159-
object_dtype_indices_as_set = set(object_dtype_indices)
163+
if box_native_indices:
164+
object_dtype_indices_as_set = set(box_native_indices)
160165
object_dtype_cols = {
161166
col
162167
for i, col in enumerate(df.columns)
@@ -176,8 +181,8 @@ def to_dict(
176181
(t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
177182
for t in df.itertuples(name=None)
178183
)
179-
elif object_dtype_indices:
180-
object_dtype_indices_as_set = set(object_dtype_indices)
184+
elif box_native_indices:
185+
object_dtype_indices_as_set = set(box_native_indices)
181186
is_object_dtype_by_index = [
182187
i in object_dtype_indices_as_set for i in range(len(df.columns))
183188
]

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@
8989
from pandas.core.dtypes.common import (
9090
ensure_platform_int,
9191
is_dict_like,
92+
is_extension_array_dtype,
9293
is_integer,
9394
is_iterator,
9495
is_list_like,
@@ -1832,7 +1833,7 @@ def to_dict(self, into: type[dict] = dict) -> dict:
18321833
# GH16122
18331834
into_c = com.standardize_mapping(into)
18341835

1835-
if is_object_dtype(self):
1836+
if is_object_dtype(self) or is_extension_array_dtype(self):
18361837
return into_c((k, maybe_box_native(v)) for k, v in self.items())
18371838
else:
18381839
# Not an object dtype => all types will be the same so let the default

pandas/tests/frame/methods/test_to_dict.py

+27
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import pytz
1010

1111
from pandas import (
12+
NA,
1213
DataFrame,
1314
Index,
1415
MultiIndex,
@@ -458,3 +459,29 @@ def test_to_dict_index_false(self, orient, expected):
458459
df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"])
459460
result = df.to_dict(orient=orient, index=False)
460461
tm.assert_dict_equal(result, expected)
462+
463+
@pytest.mark.parametrize(
464+
"orient, expected",
465+
[
466+
("dict", {"a": {0: 1, 1: None}}),
467+
("list", {"a": [1, None]}),
468+
("split", {"index": [0, 1], "columns": ["a"], "data": [[1], [None]]}),
469+
(
470+
"tight",
471+
{
472+
"index": [0, 1],
473+
"columns": ["a"],
474+
"data": [[1], [None]],
475+
"index_names": [None],
476+
"column_names": [None],
477+
},
478+
),
479+
("records", [{"a": 1}, {"a": None}]),
480+
("index", {0: {"a": 1}, 1: {"a": None}}),
481+
],
482+
)
483+
def test_to_dict_na_to_none(self, orient, expected):
484+
# GH#50795
485+
df = DataFrame({"a": [1, NA]}, dtype="Int64")
486+
result = df.to_dict(orient=orient)
487+
assert result == expected

0 commit comments

Comments
 (0)