From 2763717b04f802cd84d7e278fb3ccafaef0d46b6 Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Sun, 25 Nov 2018 18:52:35 +0100
Subject: [PATCH 1/6] wip

---
 pandas/core/frame.py                  |  7 ++++---
 pandas/tests/frame/test_convert_to.py | 28 +++++++++++++++++++++------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5104cf815abf6..9fa9cd1721e29 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1227,9 +1227,10 @@ def to_dict(self, orient='dict', into=dict):
             return into_c((k, com.maybe_box_datetimelike(v))
                           for k, v in compat.iteritems(self))
         elif orient.lower().startswith('r'):
-            return [into_c((k, com.maybe_box_datetimelike(v))
-                           for k, v in zip(self.columns, np.atleast_1d(row)))
-                    for row in self.values]
+            return [
+                into_c((k, com.maybe_box_datetimelike(v))
+                           for k, v in compat.iteritems(row._asdict()))
+                    for row in self.itertuples(index=False)]
         elif orient.lower().startswith('i'):
             if not self.index.is_unique:
                 raise ValueError(
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 61fe9d12c173c..76c392b68ece3 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -282,16 +282,32 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         tm.assert_numpy_array_equal(result, expected)
 
     def test_to_dict_box_scalars(self):
-        # 14216
+        # 14216, 23753
         # make sure that we are boxing properly
-        d = {'a': [1], 'b': ['b']}
+        df =  DataFrame({'a': [1, 2], 'b': [.1, .2]})
 
-        result = DataFrame(d).to_dict()
-        assert isinstance(list(result['a'])[0], (int, long))
-        assert isinstance(list(result['b'])[0], (int, long))
+        result = df.to_dict()
+        assert isinstance(result['a'][0], (int, long))
+        assert isinstance(result['b'][0], float)
 
-        result = DataFrame(d).to_dict(orient='records')
+        result = df.to_dict(orient='records')
+        assert isinstance(result[0]['a'], (int, long))
+        assert isinstance(result[0]['b'], float)
+
+        result = df.to_dict(orient='list')
+        assert isinstance(result['a'][0], (int, long))
+        assert isinstance(result['b'][0], float)
+
+        result = df.to_dict(orient='split')
+        assert isinstance(result['data'][0][0], (int, long))
+        assert isinstance(result['data'][0][1], float)
+
+        result = df.to_dict(orient='index')
         assert isinstance(result[0]['a'], (int, long))
+        assert isinstance(result[0]['b'], float)
+
+
+
 
     def test_frame_to_dict_tz(self):
         # GH18372 When converting to dict with orient='records' columns of

From e872f5045291587471bd88a1a22f40c76f8f0c1c Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Sun, 25 Nov 2018 19:56:40 +0100
Subject: [PATCH 2/6] tests for all meaningful orientations

---
 pandas/core/frame.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9fa9cd1721e29..8d7ba10ad795f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1220,9 +1220,9 @@ def to_dict(self, orient='dict', into=dict):
         elif orient.lower().startswith('sp'):
             return into_c((('index', self.index.tolist()),
                            ('columns', self.columns.tolist()),
-                           ('data', lib.map_infer(self.values.ravel(),
-                                                  com.maybe_box_datetimelike)
-                            .reshape(self.values.shape).tolist())))
+                           ('data', [
+                               [com.maybe_box_datetimelike(v) for v in t] for t in self.itertuples(index=False)]
+                            )))
         elif orient.lower().startswith('s'):
             return into_c((k, com.maybe_box_datetimelike(v))
                           for k, v in compat.iteritems(self))

From 20c0d6658a7e15b9b56e9bca6f77ea8ea25eb4a7 Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Mon, 26 Nov 2018 10:37:33 +0100
Subject: [PATCH 3/6] add whatsnew

---
 doc/source/whatsnew/v0.23.5.txt       |  2 +-
 pandas/core/frame.py                  | 17 +++++++++++------
 pandas/tests/frame/test_convert_to.py |  9 +++------
 3 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index 8f4b1a13c2e9d..c1716ff59c171 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -42,7 +42,6 @@ Bug Fixes
 **Groupby/Resample/Rolling**
 
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
--
 
 **Missing**
 
@@ -52,3 +51,4 @@ Bug Fixes
 **I/O**
 
 - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
+- Bug in :meth:`DataFrame.to_dict` when the result dict contains non-Python scalars (:issue:`23753`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8d7ba10ad795f..e5bb9239617c7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -280,6 +280,7 @@
     Index(['value'], dtype='object')
 """
 
+
 # -----------------------------------------------------------------------
 # DataFrame class
 
@@ -1221,7 +1222,8 @@ def to_dict(self, orient='dict', into=dict):
             return into_c((('index', self.index.tolist()),
                            ('columns', self.columns.tolist()),
                            ('data', [
-                               [com.maybe_box_datetimelike(v) for v in t] for t in self.itertuples(index=False)]
+                               list(map(com.maybe_box_datetimelike, t))
+                               for t in self.itertuples(index=False)]
                             )))
         elif orient.lower().startswith('s'):
             return into_c((k, com.maybe_box_datetimelike(v))
@@ -1229,8 +1231,8 @@ def to_dict(self, orient='dict', into=dict):
         elif orient.lower().startswith('r'):
             return [
                 into_c((k, com.maybe_box_datetimelike(v))
-                           for k, v in compat.iteritems(row._asdict()))
-                    for row in self.itertuples(index=False)]
+                       for k, v in compat.iteritems(row._asdict()))
+                for row in self.itertuples(index=False)]
         elif orient.lower().startswith('i'):
             if not self.index.is_unique:
                 raise ValueError(
@@ -2654,6 +2656,7 @@ def _get_value(self, index, col, takeable=False):
             col = self.columns.get_loc(col)
             index = self.index.get_loc(index)
             return self._get_value(index, col, takeable=True)
+
     _get_value.__doc__ = get_value.__doc__
 
     def set_value(self, index, col, value, takeable=False):
@@ -2698,6 +2701,7 @@ def _set_value(self, index, col, value, takeable=False):
             self._item_cache.pop(col, None)
 
             return self
+
     _set_value.__doc__ = set_value.__doc__
 
     def _ixs(self, i, axis=0):
@@ -3161,6 +3165,7 @@ def select_dtypes(self, include=None, exclude=None):
         4   True  1.0
         5  False  2.0
         """
+
         def _get_info_slice(obj, indexer):
             """Slice the info axis of `obj` with `indexer`."""
             if not hasattr(obj, '_info_axis_number'):
@@ -6045,9 +6050,9 @@ def diff(self, periods=1, axis=0):
     # Function application
 
     def _gotitem(self,
-                 key,           # type: Union[str, List[str]]
-                 ndim,          # type: int
-                 subset=None    # type: Union[Series, DataFrame, None]
+                 key,  # type: Union[str, List[str]]
+                 ndim,  # type: int
+                 subset=None  # type: Union[Series, DataFrame, None]
                  ):
         # type: (...) -> Union[Series, DataFrame]
         """
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 76c392b68ece3..b6ede3972a1c7 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -150,7 +150,7 @@ def test_to_records_index_name(self):
     def test_to_records_with_unicode_index(self):
         # GH13172
         # unicode_literals conflict with to_records
-        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a')\
+        result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \
             .to_records()
         expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')])
         tm.assert_almost_equal(result, expected)
@@ -284,7 +284,7 @@ def test_to_records_datetimeindex_with_tz(self, tz):
     def test_to_dict_box_scalars(self):
         # 14216, 23753
         # make sure that we are boxing properly
-        df =  DataFrame({'a': [1, 2], 'b': [.1, .2]})
+        df = DataFrame({'a': [1, 2], 'b': [.1, .2]})
 
         result = df.to_dict()
         assert isinstance(result['a'][0], (int, long))
@@ -306,14 +306,11 @@ def test_to_dict_box_scalars(self):
         assert isinstance(result[0]['a'], (int, long))
         assert isinstance(result[0]['b'], float)
 
-
-
-
     def test_frame_to_dict_tz(self):
         # GH18372 When converting to dict with orient='records' columns of
         # datetime that are tz-aware were not converted to required arrays
         data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),),
-                (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc,),)]
+                (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc, ),)]
         df = DataFrame(list(data), columns=["d", ])
 
         result = df.to_dict(orient='records')

From dd5313b70fb339f1041e6b9ea6b2d641fea803c9 Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Mon, 26 Nov 2018 20:53:32 +0100
Subject: [PATCH 4/6] Fixes: formatting + test parametrization

---
 doc/source/whatsnew/v0.23.5.txt       |  1 -
 doc/source/whatsnew/v0.24.0.rst       |  2 ++
 pandas/core/frame.py                  | 10 +++-----
 pandas/tests/frame/test_convert_to.py | 34 ++++++++++-----------------
 4 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index c1716ff59c171..250f56316b594 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -51,4 +51,3 @@ Bug Fixes
 **I/O**
 
 - Bug in :func:`read_csv` that caused it to raise ``OverflowError`` when trying to use 'inf' as ``na_value`` with integer index column (:issue:`17128`)
-- Bug in :meth:`DataFrame.to_dict` when the result dict contains non-Python scalars (:issue:`23753`)
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 4ff3cc728f7f7..6ee1a8f93a3ea 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1334,6 +1334,8 @@ MultiIndex
 I/O
 ^^^
 
+- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
+
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
 Proper handling of `np.NaN` in a string data-typed column with the Python engine
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e5bb9239617c7..2caec0f6a6c4d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -280,7 +280,6 @@
     Index(['value'], dtype='object')
 """
 
-
 # -----------------------------------------------------------------------
 # DataFrame class
 
@@ -2656,7 +2655,6 @@ def _get_value(self, index, col, takeable=False):
             col = self.columns.get_loc(col)
             index = self.index.get_loc(index)
             return self._get_value(index, col, takeable=True)
-
     _get_value.__doc__ = get_value.__doc__
 
     def set_value(self, index, col, value, takeable=False):
@@ -2701,7 +2699,6 @@ def _set_value(self, index, col, value, takeable=False):
             self._item_cache.pop(col, None)
 
             return self
-
     _set_value.__doc__ = set_value.__doc__
 
     def _ixs(self, i, axis=0):
@@ -3165,7 +3162,6 @@ def select_dtypes(self, include=None, exclude=None):
         4   True  1.0
         5  False  2.0
         """
-
         def _get_info_slice(obj, indexer):
             """Slice the info axis of `obj` with `indexer`."""
             if not hasattr(obj, '_info_axis_number'):
@@ -6050,9 +6046,9 @@ def diff(self, periods=1, axis=0):
     # Function application
 
     def _gotitem(self,
-                 key,  # type: Union[str, List[str]]
-                 ndim,  # type: int
-                 subset=None  # type: Union[Series, DataFrame, None]
+                 key,           # type: Union[str, List[str]]
+                 ndim,          # type: int
+                 subset=None    # type: Union[Series, DataFrame, None]
                  ):
         # type: (...) -> Union[Series, DataFrame]
         """
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index b6ede3972a1c7..24b7a401f15d1 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -281,36 +281,26 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         # both converted to UTC, so they are equal
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_to_dict_box_scalars(self):
+    @pytest.mark.parametrize('orient,item_getter',
+                             [('dict', lambda d, col, idx: d[col][idx]),
+                              ('records', lambda d, col, idx: d[idx][col]),
+                              ('list', lambda d, col, idx: d[col][idx]),
+                              ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
+                              ('index', lambda d, col, idx: d[idx][col])
+    ])
+    def test_to_dict_box_scalars(self, orient, item_getter):
         # 14216, 23753
         # make sure that we are boxing properly
         df = DataFrame({'a': [1, 2], 'b': [.1, .2]})
-
-        result = df.to_dict()
-        assert isinstance(result['a'][0], (int, long))
-        assert isinstance(result['b'][0], float)
-
-        result = df.to_dict(orient='records')
-        assert isinstance(result[0]['a'], (int, long))
-        assert isinstance(result[0]['b'], float)
-
-        result = df.to_dict(orient='list')
-        assert isinstance(result['a'][0], (int, long))
-        assert isinstance(result['b'][0], float)
-
-        result = df.to_dict(orient='split')
-        assert isinstance(result['data'][0][0], (int, long))
-        assert isinstance(result['data'][0][1], float)
-
-        result = df.to_dict(orient='index')
-        assert isinstance(result[0]['a'], (int, long))
-        assert isinstance(result[0]['b'], float)
+        result = df.to_dict(orient=orient)
+        assert isinstance(item_getter(result, 'a', 0), (int, long))
+        assert isinstance(item_getter(result, 'b', 0), float)
 
     def test_frame_to_dict_tz(self):
         # GH18372 When converting to dict with orient='records' columns of
         # datetime that are tz-aware were not converted to required arrays
         data = [(datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=pytz.utc),),
-                (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc, ),)]
+                (datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=pytz.utc,),)]
         df = DataFrame(list(data), columns=["d", ])
 
         result = df.to_dict(orient='records')

From 0bd960eb579f1120c17b554640e9707f7dab0c7e Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Tue, 27 Nov 2018 20:07:55 +0100
Subject: [PATCH 5/6] Another set of fixes

---
 doc/source/whatsnew/v0.23.5.txt       |  1 +
 doc/source/whatsnew/v0.24.0.rst       |  2 +-
 pandas/core/frame.py                  |  9 +++++----
 pandas/tests/frame/test_convert_to.py | 14 ++++++++------
 4 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.5.txt b/doc/source/whatsnew/v0.23.5.txt
index 250f56316b594..8f4b1a13c2e9d 100644
--- a/doc/source/whatsnew/v0.23.5.txt
+++ b/doc/source/whatsnew/v0.23.5.txt
@@ -42,6 +42,7 @@ Bug Fixes
 **Groupby/Resample/Rolling**
 
 - Bug in :meth:`DataFrame.resample` when resampling ``NaT`` in ``TimeDeltaIndex`` (:issue:`13223`).
+-
 
 **Missing**
 
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
index 6ee1a8f93a3ea..823314fe9826e 100644
--- a/doc/source/whatsnew/v0.24.0.rst
+++ b/doc/source/whatsnew/v0.24.0.rst
@@ -1334,7 +1334,6 @@ MultiIndex
 I/O
 ^^^
 
-- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
 
 .. _whatsnew_0240.bug_fixes.nan_with_str_dtype:
 
@@ -1397,6 +1396,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form
 - Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`)
 - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`)
 - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`)
+- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2caec0f6a6c4d..d709630aebc60 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1117,6 +1117,7 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
+
     def to_dict(self, orient='dict', into=dict):
         """
         Convert the DataFrame to a dictionary.
@@ -1184,10 +1185,10 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> df.to_dict('split')
         {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
-         'data': [[1.0, 0.5], [2.0, 0.75]]}
+         'data': [[1, 0.5], [2, 0.75]]}
 
         >>> df.to_dict('records')
-        [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}]
+        [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
 
         >>> df.to_dict('index')
         {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
@@ -1203,8 +1204,8 @@ def to_dict(self, orient='dict', into=dict):
 
         >>> dd = defaultdict(list)
         >>> df.to_dict('records', into=dd)
-        [defaultdict(<class 'list'>, {'col1': 1.0, 'col2': 0.5}),
-         defaultdict(<class 'list'>, {'col1': 2.0, 'col2': 0.75})]
+        [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
+         defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
         """
         if not self.columns.is_unique:
             warnings.warn("DataFrame columns are not unique, some "
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 24b7a401f15d1..ea983193d8c11 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -281,12 +281,14 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         # both converted to UTC, so they are equal
         tm.assert_numpy_array_equal(result, expected)
 
-    @pytest.mark.parametrize('orient,item_getter',
-                             [('dict', lambda d, col, idx: d[col][idx]),
-                              ('records', lambda d, col, idx: d[idx][col]),
-                              ('list', lambda d, col, idx: d[col][idx]),
-                              ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
-                              ('index', lambda d, col, idx: d[idx][col])
+    # orient - orient argument to to_dict function
+    # item_getter - function for extracting value from resulting dict using column name and index
+    @pytest.mark.parametrize('orient,item_getter', [
+        ('dict', lambda d, col, idx: d[col][idx]),
+        ('records', lambda d, col, idx: d[idx][col]),
+        ('list', lambda d, col, idx: d[col][idx]),
+        ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]),
+        ('index', lambda d, col, idx: d[idx][col])
     ])
     def test_to_dict_box_scalars(self, orient, item_getter):
         # 14216, 23753

From dd4833f5571293ae219ade4f9a4eadfe6fbd240b Mon Sep 17 00:00:00 2001
From: "a.bogachev" <a.bogachev@criteo.com>
Date: Wed, 28 Nov 2018 16:07:04 +0100
Subject: [PATCH 6/6] fix some lintint errors

---
 pandas/core/frame.py                  | 1 -
 pandas/tests/frame/test_convert_to.py | 3 ++-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d709630aebc60..5a31e25cffe95 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1117,7 +1117,6 @@ def from_dict(cls, data, orient='columns', dtype=None, columns=None):
 
         return cls(data, index=index, columns=columns, dtype=dtype)
 
-
     def to_dict(self, orient='dict', into=dict):
         """
         Convert the DataFrame to a dictionary.
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index ea983193d8c11..f1eb6a33eddeb 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -282,7 +282,8 @@ def test_to_records_datetimeindex_with_tz(self, tz):
         tm.assert_numpy_array_equal(result, expected)
 
     # orient - orient argument to to_dict function
-    # item_getter - function for extracting value from resulting dict using column name and index
+    # item_getter - function for extracting value from
+    # the resulting dict using column name and index
     @pytest.mark.parametrize('orient,item_getter', [
         ('dict', lambda d, col, idx: d[col][idx]),
         ('records', lambda d, col, idx: d[idx][col]),