Skip to content

Commit 6620dc6

Browse files
authored
ENH: Add support for dataclasses in the DataFrame constructor (pandas-dev#27999)
1 parent 2b34275 commit 6620dc6

File tree

6 files changed

+131
-1
lines changed

6 files changed

+131
-1
lines changed

doc/source/user_guide/dsintro.rst

+22
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,28 @@ The result will be a DataFrame with the same index as the input Series, and
397397
with one column whose name is the original name of the Series (only if no other
398398
column name provided).
399399

400+
.. _basics.dataframe.from_list_dataclasses:
401+
402+
From a list of dataclasses
403+
~~~~~~~~~~~~~~~~~~~~~~~~~~
404+
405+
.. versionadded:: 1.1.0
406+
407+
Data Classes as introduced in `PEP557 <https://www.python.org/dev/peps/pep-0557>`__,
408+
can be passed into the DataFrame constructor.
409+
Passing a list of dataclasses is equivilent to passing a list of dictionaries.
410+
411+
Please be aware, that that all values in the list should be dataclasses, mixing
412+
types in the list would result in a TypeError.
413+
414+
.. ipython:: python
415+
416+
from dataclasses import make_dataclass
417+
418+
Point = make_dataclass("Point", [("x", int), ("y", int)])
419+
420+
pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
421+
400422
**Missing data**
401423

402424
Much more will be said on this topic in the :ref:`Missing data <missing_data>`

pandas/core/dtypes/common.py

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
is_array_like,
2525
is_bool,
2626
is_complex,
27+
is_dataclass,
2728
is_decimal,
2829
is_dict_like,
2930
is_file_like,

pandas/core/dtypes/inference.py

+36
Original file line numberDiff line numberDiff line change
@@ -386,3 +386,39 @@ def is_sequence(obj) -> bool:
386386
return not isinstance(obj, (str, bytes))
387387
except (TypeError, AttributeError):
388388
return False
389+
390+
391+
def is_dataclass(item):
392+
"""
393+
Checks if the object is a data-class instance
394+
395+
Parameters
396+
----------
397+
item : object
398+
399+
Returns
400+
--------
401+
is_dataclass : bool
402+
True if the item is an instance of a data-class,
403+
will return false if you pass the data class itself
404+
405+
Examples
406+
--------
407+
>>> from dataclasses import dataclass
408+
>>> @dataclass
409+
... class Point:
410+
... x: int
411+
... y: int
412+
413+
>>> is_dataclass(Point)
414+
False
415+
>>> is_dataclass(Point(0,2))
416+
True
417+
418+
"""
419+
try:
420+
from dataclasses import is_dataclass
421+
422+
return is_dataclass(item) and not isinstance(item, type)
423+
except ImportError:
424+
return False

pandas/core/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
ensure_platform_int,
7878
infer_dtype_from_object,
7979
is_bool_dtype,
80+
is_dataclass,
8081
is_datetime64_any_dtype,
8182
is_dict_like,
8283
is_dtype_equal,
@@ -117,6 +118,7 @@
117118
from pandas.core.internals import BlockManager
118119
from pandas.core.internals.construction import (
119120
arrays_to_mgr,
121+
dataclasses_to_dicts,
120122
get_names_from_index,
121123
init_dict,
122124
init_ndarray,
@@ -474,6 +476,8 @@ def __init__(
474476
if not isinstance(data, (abc.Sequence, ExtensionArray)):
475477
data = list(data)
476478
if len(data) > 0:
479+
if is_dataclass(data[0]):
480+
data = dataclasses_to_dicts(data)
477481
if is_list_like(data[0]) and getattr(data[0], "ndim", 1) == 1:
478482
if is_named_tuple(data[0]) and columns is None:
479483
columns = data[0]._fields

pandas/core/internals/construction.py

+27
Original file line numberDiff line numberDiff line change
@@ -429,6 +429,33 @@ def _get_axes(N, K, index, columns):
429429
return index, columns
430430

431431

432+
def dataclasses_to_dicts(data):
433+
""" Converts a list of dataclass instances to a list of dictionaries
434+
435+
Parameters
436+
----------
437+
data : List[Type[dataclass]]
438+
439+
Returns
440+
--------
441+
list_dict : List[dict]
442+
443+
Examples
444+
--------
445+
>>> @dataclass
446+
>>> class Point:
447+
... x: int
448+
... y: int
449+
450+
>>> dataclasses_to_dicts([Point(1,2), Point(2,3)])
451+
[{"x":1,"y":2},{"x":2,"y":3}]
452+
453+
"""
454+
from dataclasses import asdict
455+
456+
return list(map(asdict, data))
457+
458+
432459
# ---------------------------------------------------------------------
433460
# Conversion of Inputs to Arrays
434461

pandas/tests/frame/test_constructors.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pytest
1010
import pytz
1111

12-
from pandas.compat import is_platform_little_endian
12+
from pandas.compat import PY37, is_platform_little_endian
1313
from pandas.compat.numpy import _is_numpy_dev
1414

1515
from pandas.core.dtypes.common import is_integer_dtype
@@ -1364,6 +1364,46 @@ def test_constructor_list_of_namedtuples(self):
13641364
result = DataFrame(tuples, columns=["y", "z"])
13651365
tm.assert_frame_equal(result, expected)
13661366

1367+
@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
1368+
def test_constructor_list_of_dataclasses(self):
1369+
# GH21910
1370+
from dataclasses import make_dataclass
1371+
1372+
Point = make_dataclass("Point", [("x", int), ("y", int)])
1373+
1374+
datas = [Point(0, 3), Point(1, 3)]
1375+
expected = DataFrame({"x": [0, 1], "y": [3, 3]})
1376+
result = DataFrame(datas)
1377+
tm.assert_frame_equal(result, expected)
1378+
1379+
@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
1380+
def test_constructor_list_of_dataclasses_with_varying_types(self):
1381+
# GH21910
1382+
from dataclasses import make_dataclass
1383+
1384+
# varying types
1385+
Point = make_dataclass("Point", [("x", int), ("y", int)])
1386+
HLine = make_dataclass("HLine", [("x0", int), ("x1", int), ("y", int)])
1387+
1388+
datas = [Point(0, 3), HLine(1, 3, 3)]
1389+
1390+
expected = DataFrame(
1391+
{"x": [0, np.nan], "y": [3, 3], "x0": [np.nan, 1], "x1": [np.nan, 3]}
1392+
)
1393+
result = DataFrame(datas)
1394+
tm.assert_frame_equal(result, expected)
1395+
1396+
@pytest.mark.skipif(not PY37, reason="Requires Python >= 3.7")
1397+
def test_constructor_list_of_dataclasses_error_thrown(self):
1398+
# GH21910
1399+
from dataclasses import make_dataclass
1400+
1401+
Point = make_dataclass("Point", [("x", int), ("y", int)])
1402+
1403+
# expect TypeError
1404+
with pytest.raises(TypeError):
1405+
DataFrame([Point(0, 0), {"x": 1, "y": 0}])
1406+
13671407
def test_constructor_list_of_dict_order(self):
13681408
# GH10056
13691409
data = [

0 commit comments

Comments
 (0)