diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index d0dddb19f4c93..30be5e496263c 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1342,6 +1342,7 @@ MultiIndex I/O ^^^ + .. _whatsnew_0240.bug_fixes.nan_with_str_dtype: Proper handling of `np.NaN` in a string data-typed column with the Python engine @@ -1404,6 +1405,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`read_excel()` in which extraneous header names were extracted, even though none were specified (:issue:`11733`) - Bug in :meth:`read_excel()` in which ``index_col=None`` was not being respected and parsing index columns anyway (:issue:`20480`) - Bug in :meth:`read_excel()` in which ``usecols`` was not being validated for proper column names when passed in as a string (:issue:`20480`) +- Bug in :meth:`DataFrame.to_dict` when the resulting dict contains non-Python scalars in the case of numeric data (:issue:`23753`) - :func:`DataFrame.to_string()`, :func:`DataFrame.to_html()`, :func:`DataFrame.to_latex()` will correctly format output when a string is passed as the ``float_format`` argument (:issue:`21625`, :issue:`22270`) Plotting diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 688f0226dcdba..99e6e3c6569ad 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1195,10 +1195,10 @@ def to_dict(self, orient='dict', into=dict): >>> df.to_dict('split') {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], - 'data': [[1.0, 0.5], [2.0, 0.75]]} + 'data': [[1, 0.5], [2, 0.75]]} >>> df.to_dict('records') - [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] + [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}] >>> df.to_dict('index') {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} @@ -1214,8 +1214,8 @@ def to_dict(self, orient='dict', into=dict): >>> dd = defaultdict(list) >>> df.to_dict('records', into=dd) - [defaultdict(, {'col1': 1.0, 'col2': 0.5}), - defaultdict(, {'col1': 2.0, 'col2': 0.75})] + [defaultdict(, {'col1': 1, 'col2': 0.5}), + defaultdict(, {'col1': 2, 'col2': 0.75})] """ if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " @@ -1231,16 +1231,18 @@ def to_dict(self, orient='dict', into=dict): elif orient.lower().startswith('sp'): return into_c((('index', self.index.tolist()), ('columns', self.columns.tolist()), - ('data', lib.map_infer(self.values.ravel(), - com.maybe_box_datetimelike) - .reshape(self.values.shape).tolist()))) + ('data', [ + list(map(com.maybe_box_datetimelike, t)) + for t in self.itertuples(index=False)] + ))) elif orient.lower().startswith('s'): return into_c((k, com.maybe_box_datetimelike(v)) for k, v in compat.iteritems(self)) elif orient.lower().startswith('r'): - return [into_c((k, com.maybe_box_datetimelike(v)) - for k, v in zip(self.columns, np.atleast_1d(row))) - for row in self.values] + return [ + into_c((k, com.maybe_box_datetimelike(v)) + for k, v in compat.iteritems(row._asdict())) + for row in self.itertuples(index=False)] elif orient.lower().startswith('i'): if not self.index.is_unique: raise ValueError( diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index 61fe9d12c173c..f1eb6a33eddeb 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -150,7 +150,7 @@ def test_to_records_index_name(self): def test_to_records_with_unicode_index(self): # GH13172 # unicode_literals conflict with to_records - result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a')\ + result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \ .to_records() expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')]) tm.assert_almost_equal(result, expected) @@ -281,17 +281,23 @@ def test_to_records_datetimeindex_with_tz(self, tz): # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected) - def test_to_dict_box_scalars(self): - # 14216 + # orient - orient argument to to_dict function + # item_getter - function for extracting value from + # the resulting dict using column name and index + @pytest.mark.parametrize('orient,item_getter', [ + ('dict', lambda d, col, idx: d[col][idx]), + ('records', lambda d, col, idx: d[idx][col]), + ('list', lambda d, col, idx: d[col][idx]), + ('split', lambda d, col, idx: d['data'][idx][d['columns'].index(col)]), + ('index', lambda d, col, idx: d[idx][col]) + ]) + def test_to_dict_box_scalars(self, orient, item_getter): + # 14216, 23753 # make sure that we are boxing properly - d = {'a': [1], 'b': ['b']} - - result = DataFrame(d).to_dict() - assert isinstance(list(result['a'])[0], (int, long)) - assert isinstance(list(result['b'])[0], (int, long)) - - result = DataFrame(d).to_dict(orient='records') - assert isinstance(result[0]['a'], (int, long)) + df = DataFrame({'a': [1, 2], 'b': [.1, .2]}) + result = df.to_dict(orient=orient) + assert isinstance(item_getter(result, 'a', 0), (int, long)) + assert isinstance(item_getter(result, 'b', 0), float) def test_frame_to_dict_tz(self): # GH18372 When converting to dict with orient='records' columns of