BUG: support dtypes in column_dtypes for to_records() (pandas-dev#24895)

qwhelan · Pingviinituutti · commit f108ec48e5ff · 2019-02-28T10:19:16.000+02:00
diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -187,7 +187,7 @@ Reshaping
 ^^^^^^^^^
 
 - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
--
+- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
 -
 
 
@@ -213,4 +213,3 @@ Contributors
 ~~~~~~~~~~~~
 
 .. contributors:: v0.24.x..HEAD
-
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1719,7 +1719,8 @@ def to_records(self, index=True, convert_datetime64=None,
             # string naming a type.
             if dtype_mapping is None:
                 formats.append(v.dtype)
-            elif isinstance(dtype_mapping, (type, compat.string_types)):
+            elif isinstance(dtype_mapping, (type, np.dtype,
+                                            compat.string_types)):
                 formats.append(dtype_mapping)
             else:
                 element = "row" if i < index_len else "column"
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
@@ -10,7 +10,9 @@
 
 from pandas.compat import long
 
-from pandas import DataFrame, MultiIndex, Series, Timestamp, compat, date_range
+from pandas import (
+    CategoricalDtype, DataFrame, MultiIndex, Series, Timestamp, compat,
+    date_range)
 from pandas.tests.frame.common import TestData
 import pandas.util.testing as tm
 
@@ -220,6 +222,12 @@ def test_to_records_with_categorical(self):
                       dtype=[("index", "<i8"), ("A", "<U"),
                              ("B", "<U"), ("C", "<U")])),
 
+        # Pass in a dtype instance.
+        (dict(column_dtypes=np.dtype('unicode')),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "<U"),
+                             ("B", "<U"), ("C", "<U")])),
+
         # Pass in a dictionary (name-only).
         (dict(column_dtypes={"A": np.int8, "B": np.float32, "C": "<U2"}),
          np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
@@ -249,6 +257,12 @@ def test_to_records_with_categorical(self):
                       dtype=[("index", "<i8"), ("A", "i1"),
                              ("B", "<f4"), ("C", "O")])),
 
+        # Names / indices not in dtype mapping default to array dtype.
+        (dict(column_dtypes={"A": np.dtype('int8'), "B": np.dtype('float32')}),
+         np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
+                      dtype=[("index", "<i8"), ("A", "i1"),
+                             ("B", "<f4"), ("C", "O")])),
+
         # Mixture of everything.
         (dict(column_dtypes={"A": np.int8, "B": np.float32},
               index_dtypes="<U2"),
@@ -258,17 +272,26 @@ def test_to_records_with_categorical(self):
 
         # Invalid dype values.
         (dict(index=False, column_dtypes=list()),
-         "Invalid dtype \\[\\] specified for column A"),
+         (ValueError, "Invalid dtype \\[\\] specified for column A")),
 
         (dict(index=False, column_dtypes={"A": "int32", "B": 5}),
-         "Invalid dtype 5 specified for column B"),
+         (ValueError, "Invalid dtype 5 specified for column B")),
+
+        # Numpy can't handle EA types, so check error is raised
+        (dict(index=False, column_dtypes={"A": "int32",
+                                          "B": CategoricalDtype(['a', 'b'])}),
+         (ValueError, 'Invalid dtype category specified for column B')),
+
+        # Check that bad types raise
+        (dict(index=False, column_dtypes={"A": "int32", "B": "foo"}),
+         (TypeError, 'data type "foo" not understood')),
     ])
     def test_to_records_dtype(self, kwargs, expected):
         # see gh-18146
         df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})
 
-        if isinstance(expected, str):
-            with pytest.raises(ValueError, match=expected):
+        if not isinstance(expected, np.recarray):
+            with pytest.raises(expected[0], match=expected[1]):
                 df.to_records(**kwargs)
         else:
             result = df.to_records(**kwargs)

Original file line number	Diff line number	Diff line change
`@@ -187,7 +187,7 @@ Reshaping`
`187`	`187`	`^^^^^^^^^`
`188`	`188`
`189`	`189`	- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
`190`		`--`
	`190`	+- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
`191`	`191`	`-`
`192`	`192`
`193`	`193`
`@@ -213,4 +213,3 @@ Contributors`
`213`	`213`	`~~~~~~~~~~~~`
`214`	`214`
`215`	`215`	`.. contributors:: v0.24.x..HEAD`
`216`		`-`