pandas-dev · jreback · Dec 24, 2014 · Dec 19, 2014 · jreback · Dec 24, 2014
diff --git a/doc/source/whatsnew/v0.16.0.txt b/doc/source/whatsnew/v0.16.0.txt
@@ -28,6 +28,22 @@ Backwards incompatible API changes
 .. _whatsnew_0160.api_breaking:
 
 - ``Index.duplicated`` now returns `np.array(dtype=bool)` rather than `Index(dtype=object)` containing `bool` values. (:issue:`8875`)
+- ``DataFrame.to_json`` now returns accurate type serialisation for each column for frames of mixed dtype (:issue:`9037`)
+
+  Previously data was coerced to a common dtype before serialisation, which for
+  example resulted in integers being serialised to floats:
+
+  .. code-block:: python
+
+    In [2]: pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json()
+    Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1.0,"1":2.0}}'
+
+  Now each column is serialised using its correct dtype:
+
+  .. code-block:: python
+
+    In [2]:  pd.DataFrame({'i': [1,2], 'f': [3.0, 4.2]}).to_json()
+    Out[2]: '{"f":{"0":3.0,"1":4.2},"i":{"0":1,"1":2}}'
 
 Deprecations
 ~~~~~~~~~~~~
@@ -46,10 +62,10 @@ Performance
 .. _whatsnew_0160.performance:
 
 - Fixed a performance regression for ``.loc`` indexing with an array or list-like (:issue:`9126`:).
+- ``DataFrame.to_json`` 30x performance improvement for mixed dtype frames. (:issue:`9037`)
 - Performance improvements in ``MultiIndex.duplicated`` by working with labels instead of values (:issue:`9125`)
 - Improved the speed of `nunique` by calling `unique` instead of `value_counts` (:issue:`9129`, :issue:`7771`)
 
-
 Bug Fixes
 ~~~~~~~~~
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -2160,20 +2160,12 @@ def as_blocks(self):
         Convert the frame to a dict of dtype -> Constructor Types that each has
         a homogeneous dtype.
 
-        are presented in sorted order unless a specific list of columns is
-        provided.
-
         NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
               as_matrix)
 
-        Parameters
-        ----------
-        columns : array-like
-            Specific column order
-
         Returns
         -------
-        values : a list of Object
+        values : a dict of dtype -> Constructor Types
         """
         self._consolidate_inplace()
 

diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py
@@ -1,5 +1,5 @@
 # pylint: disable-msg=W0612,E1101
-from pandas.compat import range, lrange, StringIO
+from pandas.compat import range, lrange, StringIO, OrderedDict
 from pandas import compat
 import os
 
@@ -337,14 +337,44 @@ def test_v12_compat(self):
 
         v12_json = os.path.join(self.dirpath, 'tsframe_v012.json')
         df_unser = pd.read_json(v12_json)
-        df_unser = pd.read_json(v12_json)
         assert_frame_equal(df, df_unser)
 
         df_iso = df.drop(['modified'], axis=1)
         v12_iso_json = os.path.join(self.dirpath, 'tsframe_iso_v012.json')
         df_unser_iso = pd.read_json(v12_iso_json)
         assert_frame_equal(df_iso, df_unser_iso)
 
+    def test_blocks_compat_GH9037(self):
+        index = pd.date_range('20000101', periods=10, freq='H')
+        df_mixed = DataFrame(OrderedDict(
+            float_1=[-0.92077639, 0.77434435, 1.25234727, 0.61485564,
+                     -0.60316077, 0.24653374, 0.28668979, -2.51969012,
+                     0.95748401, -1.02970536],
+            int_1=[19680418, 75337055, 99973684, 65103179, 79373900,
+                   40314334, 21290235,  4991321, 41903419, 16008365],
+            str_1=['78c608f1', '64a99743', '13d2ff52', 'ca7f4af2', '97236474',
+                   'bde7e214', '1a6bde47', 'b1190be5', '7a669144', '8d64d068'],
+            float_2=[-0.0428278, -1.80872357,  3.36042349, -0.7573685,
+                     -0.48217572, 0.86229683, 1.08935819, 0.93898739,
+                     -0.03030452, 1.43366348],
+            str_2=['14f04af9', 'd085da90', '4bcfac83', '81504caf', '2ffef4a9',
+                   '08e2f5c4', '07e1af03', 'addbd4a7', '1f6a09ba', '4bfc4d87'],
+            int_2=[86967717, 98098830, 51927505, 20372254, 12601730, 20884027,
+                   34193846, 10561746, 24867120, 76131025]
+        ), index=index)
+
+        # JSON deserialisation always creates unicode strings
+        df_mixed.columns = df_mixed.columns.astype('unicode')
+
+        df_roundtrip = pd.read_json(df_mixed.to_json(orient='split'),
+                                    orient='split')
+        assert_frame_equal(df_mixed, df_roundtrip,
+                           check_index_type=True,
+                           check_column_type=True,
+                           check_frame_type=True,
+                           by_blocks=True,
+                           check_exact=True)
+
     def test_series_non_unique_index(self):
         s = Series(['a', 'b'], index=[1, 1])
 

diff --git a/pandas/src/datetime_helper.h b/pandas/src/datetime_helper.h
@@ -13,8 +13,11 @@ void mangle_nat(PyObject *val) {
 }
 
 npy_int64 get_long_attr(PyObject *o, const char *attr) {
+  npy_int64 long_val;
   PyObject *value = PyObject_GetAttrString(o, attr);
-  return PyLong_Check(value) ? PyLong_AsLongLong(value) : PyInt_AS_LONG(value);
+  long_val = (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyInt_AS_LONG(value));
+  Py_DECREF(value);
+  return long_val;
 }
 
 npy_float64 total_seconds(PyObject *td) {