diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b1387e9717079..a5e34b2c3c1f6 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -913,6 +913,7 @@ I/O - Bug in displaying ``string`` dtypes not showing storage option (:issue:`50099`) - Bug in :meth:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`) - Bug in :meth:`DataFrame.to_string` ignoring float formatter for extension arrays (:issue:`39336`) +- Bug in :meth:`DataFrame.to_dict` not converting elements to native Python types for masked arrays (:issue:`34665`) - Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`) - Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`) - Bug in :meth:`DataFrame.to_json` where it would segfault when failing to encode a string (:issue:`50307`) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ec3fdc6db9dc9..f467eb39f842e 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -250,15 +250,15 @@ def __setitem__(self, key, value) -> None: def __iter__(self) -> Iterator: if self.ndim == 1: if not self._hasna: - for val in self._data: - yield val + for i, val in enumerate(self._data): + yield self._data.item(i) else: na_value = self.dtype.na_value - for isna_, val in zip(self._mask, self._data): + for i, (isna_, val) in enumerate(zip(self._mask, self._data)): if isna_: yield na_value else: - yield val + yield self._data.item(i) else: for i in range(len(self)): yield self[i] diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index c76699cafd481..1c0f698b1d039 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -9,6 +9,7 @@ import pytz from pandas import ( + NA, DataFrame, Index, MultiIndex, @@ -458,3 +459,13 @@ def test_to_dict_index_false(self, orient, expected): df = DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"]) result = df.to_dict(orient=orient, index=False) tm.assert_dict_equal(result, expected) + + def test_to_dict_masked_native_python(self): + # GH#34665 + df = DataFrame({"a": Series([1, 2], dtype="Int64"), "B": 1}) + result = df.to_dict(orient="records") + assert type(result[0]["a"]) is int + + df = DataFrame({"a": Series([1, NA], dtype="Int64"), "B": 1}) + result = df.to_dict(orient="records") + assert type(result[0]["a"]) is int