Skip to content

Commit f108ec4

Browse files
qwhelanPingviinituutti
authored andcommitted
BUG: support dtypes in column_dtypes for to_records() (pandas-dev#24895)
1 parent dc248e7 commit f108ec4

File tree

3 files changed

+31
-8
lines changed

3 files changed

+31
-8
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-2
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ Reshaping
187187
^^^^^^^^^
188188

189189
- Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`)
190-
-
190+
- :func:`to_records` now accepts dtypes to its `column_dtypes` parameter (:issue:`24895`)
191191
-
192192

193193

@@ -213,4 +213,3 @@ Contributors
213213
~~~~~~~~~~~~
214214

215215
.. contributors:: v0.24.x..HEAD
216-

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1719,7 +1719,8 @@ def to_records(self, index=True, convert_datetime64=None,
17191719
# string naming a type.
17201720
if dtype_mapping is None:
17211721
formats.append(v.dtype)
1722-
elif isinstance(dtype_mapping, (type, compat.string_types)):
1722+
elif isinstance(dtype_mapping, (type, np.dtype,
1723+
compat.string_types)):
17231724
formats.append(dtype_mapping)
17241725
else:
17251726
element = "row" if i < index_len else "column"

pandas/tests/frame/test_convert_to.py

+28-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010

1111
from pandas.compat import long
1212

13-
from pandas import DataFrame, MultiIndex, Series, Timestamp, compat, date_range
13+
from pandas import (
14+
CategoricalDtype, DataFrame, MultiIndex, Series, Timestamp, compat,
15+
date_range)
1416
from pandas.tests.frame.common import TestData
1517
import pandas.util.testing as tm
1618

@@ -220,6 +222,12 @@ def test_to_records_with_categorical(self):
220222
dtype=[("index", "<i8"), ("A", "<U"),
221223
("B", "<U"), ("C", "<U")])),
222224
225+
# Pass in a dtype instance.
226+
(dict(column_dtypes=np.dtype('unicode')),
227+
np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
228+
dtype=[("index", "<i8"), ("A", "<U"),
229+
("B", "<U"), ("C", "<U")])),
230+
223231
# Pass in a dictionary (name-only).
224232
(dict(column_dtypes={"A": np.int8, "B": np.float32, "C": "<U2"}),
225233
np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
@@ -249,6 +257,12 @@ def test_to_records_with_categorical(self):
249257
dtype=[("index", "<i8"), ("A", "i1"),
250258
("B", "<f4"), ("C", "O")])),
251259
260+
# Names / indices not in dtype mapping default to array dtype.
261+
(dict(column_dtypes={"A": np.dtype('int8'), "B": np.dtype('float32')}),
262+
np.rec.array([("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
263+
dtype=[("index", "<i8"), ("A", "i1"),
264+
("B", "<f4"), ("C", "O")])),
265+
252266
# Mixture of everything.
253267
(dict(column_dtypes={"A": np.int8, "B": np.float32},
254268
index_dtypes="<U2"),
@@ -258,17 +272,26 @@ def test_to_records_with_categorical(self):
258272
259273
# Invalid dype values.
260274
(dict(index=False, column_dtypes=list()),
261-
"Invalid dtype \\[\\] specified for column A"),
275+
(ValueError, "Invalid dtype \\[\\] specified for column A")),
262276
263277
(dict(index=False, column_dtypes={"A": "int32", "B": 5}),
264-
"Invalid dtype 5 specified for column B"),
278+
(ValueError, "Invalid dtype 5 specified for column B")),
279+
280+
# Numpy can't handle EA types, so check error is raised
281+
(dict(index=False, column_dtypes={"A": "int32",
282+
"B": CategoricalDtype(['a', 'b'])}),
283+
(ValueError, 'Invalid dtype category specified for column B')),
284+
285+
# Check that bad types raise
286+
(dict(index=False, column_dtypes={"A": "int32", "B": "foo"}),
287+
(TypeError, 'data type "foo" not understood')),
265288
])
266289
def test_to_records_dtype(self, kwargs, expected):
267290
# see gh-18146
268291
df = DataFrame({"A": [1, 2], "B": [0.2, 1.5], "C": ["a", "bc"]})
269292

270-
if isinstance(expected, str):
271-
with pytest.raises(ValueError, match=expected):
293+
if not isinstance(expected, np.recarray):
294+
with pytest.raises(expected[0], match=expected[1]):
272295
df.to_records(**kwargs)
273296
else:
274297
result = df.to_records(**kwargs)

0 commit comments

Comments
 (0)