Skip to content

Commit 7f2b418

Browse files
authored
ENH: Add orient=tight format for dictionaries (#35292)
1 parent 1e5ab7a commit 7f2b418

File tree

3 files changed

+123
-5
lines changed

3 files changed

+123
-5
lines changed

doc/source/whatsnew/v1.4.0.rst

+22
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,28 @@ Previously, negative arguments returned empty frames.
134134
df.groupby("A").nth(slice(1, -1))
135135
df.groupby("A").nth([slice(None, 1), slice(-1, None)])
136136
137+
.. _whatsnew_140.dict_tight:
138+
139+
DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option
140+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
141+
142+
A new ``'tight'`` dictionary format that preserves :class:`MultiIndex` entries and names
143+
is now available with the :meth:`DataFrame.from_dict` and :meth:`DataFrame.to_dict` methods
144+
and can be used with the standard ``json`` library to produce a tight
145+
representation of :class:`DataFrame` objects (:issue:`4889`).
146+
147+
.. ipython:: python
148+
149+
df = pd.DataFrame.from_records(
150+
[[1, 3], [2, 4]],
151+
index=pd.MultiIndex.from_tuples([("a", "b"), ("a", "c")],
152+
names=["n1", "n2"]),
153+
columns=pd.MultiIndex.from_tuples([("x", 1), ("y", 2)],
154+
names=["z1", "z2"]),
155+
)
156+
df
157+
df.to_dict(orient='tight')
158+
137159
.. _whatsnew_140.enhancements.other:
138160

139161
Other enhancements

pandas/core/frame.py

+69-5
Original file line numberDiff line numberDiff line change
@@ -1520,15 +1520,21 @@ def from_dict(
15201520
----------
15211521
data : dict
15221522
Of the form {field : array-like} or {field : dict}.
1523-
orient : {'columns', 'index'}, default 'columns'
1523+
orient : {'columns', 'index', 'tight'}, default 'columns'
15241524
The "orientation" of the data. If the keys of the passed dict
15251525
should be the columns of the resulting DataFrame, pass 'columns'
15261526
(default). Otherwise if the keys should be rows, pass 'index'.
1527+
If 'tight', assume a dict with keys ['index', 'columns', 'data',
1528+
'index_names', 'column_names'].
1529+
1530+
.. versionadded:: 1.4.0
1531+
'tight' as an allowed value for the ``orient`` argument
1532+
15271533
dtype : dtype, default None
15281534
Data type to force, otherwise infer.
15291535
columns : list, default None
15301536
Column labels to use when ``orient='index'``. Raises a ValueError
1531-
if used with ``orient='columns'``.
1537+
if used with ``orient='columns'`` or ``orient='tight'``.
15321538
15331539
Returns
15341540
-------
@@ -1539,6 +1545,7 @@ def from_dict(
15391545
DataFrame.from_records : DataFrame from structured ndarray, sequence
15401546
of tuples or dicts, or DataFrame.
15411547
DataFrame : DataFrame object creation using constructor.
1548+
DataFrame.to_dict : Convert the DataFrame to a dictionary.
15421549
15431550
Examples
15441551
--------
@@ -1569,6 +1576,21 @@ def from_dict(
15691576
A B C D
15701577
row_1 3 2 1 0
15711578
row_2 a b c d
1579+
1580+
Specify ``orient='tight'`` to create the DataFrame using a 'tight'
1581+
format:
1582+
1583+
>>> data = {'index': [('a', 'b'), ('a', 'c')],
1584+
... 'columns': [('x', 1), ('y', 2)],
1585+
... 'data': [[1, 3], [2, 4]],
1586+
... 'index_names': ['n1', 'n2'],
1587+
... 'column_names': ['z1', 'z2']}
1588+
>>> pd.DataFrame.from_dict(data, orient='tight')
1589+
z1 x y
1590+
z2 1 2
1591+
n1 n2
1592+
a b 1 3
1593+
c 2 4
15721594
"""
15731595
index = None
15741596
orient = orient.lower()
@@ -1579,13 +1601,28 @@ def from_dict(
15791601
data = _from_nested_dict(data)
15801602
else:
15811603
data, index = list(data.values()), list(data.keys())
1582-
elif orient == "columns":
1604+
elif orient == "columns" or orient == "tight":
15831605
if columns is not None:
1584-
raise ValueError("cannot use columns parameter with orient='columns'")
1606+
raise ValueError(f"cannot use columns parameter with orient='{orient}'")
15851607
else: # pragma: no cover
15861608
raise ValueError("only recognize index or columns for orient")
15871609

1588-
return cls(data, index=index, columns=columns, dtype=dtype)
1610+
if orient != "tight":
1611+
return cls(data, index=index, columns=columns, dtype=dtype)
1612+
else:
1613+
realdata = data["data"]
1614+
1615+
def create_index(indexlist, namelist):
1616+
index: Index
1617+
if len(namelist) > 1:
1618+
index = MultiIndex.from_tuples(indexlist, names=namelist)
1619+
else:
1620+
index = Index(indexlist, name=namelist[0])
1621+
return index
1622+
1623+
index = create_index(data["index"], data["index_names"])
1624+
columns = create_index(data["columns"], data["column_names"])
1625+
return cls(realdata, index=index, columns=columns, dtype=dtype)
15891626

15901627
def to_numpy(
15911628
self,
@@ -1675,13 +1712,19 @@ def to_dict(self, orient: str = "dict", into=dict):
16751712
- 'series' : dict like {column -> Series(values)}
16761713
- 'split' : dict like
16771714
{'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
1715+
- 'tight' : dict like
1716+
{'index' -> [index], 'columns' -> [columns], 'data' -> [values],
1717+
'index_names' -> [index.names], 'column_names' -> [column.names]}
16781718
- 'records' : list like
16791719
[{column -> value}, ... , {column -> value}]
16801720
- 'index' : dict like {index -> {column -> value}}
16811721
16821722
Abbreviations are allowed. `s` indicates `series` and `sp`
16831723
indicates `split`.
16841724
1725+
.. versionadded:: 1.4.0
1726+
'tight' as an allowed value for the ``orient`` argument
1727+
16851728
into : class, default dict
16861729
The collections.abc.Mapping subclass used for all Mappings
16871730
in the return value. Can be the actual class or an empty
@@ -1731,6 +1774,10 @@ def to_dict(self, orient: str = "dict", into=dict):
17311774
>>> df.to_dict('index')
17321775
{'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
17331776
1777+
>>> df.to_dict('tight')
1778+
{'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
1779+
'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
1780+
17341781
You can also specify the mapping type.
17351782
17361783
>>> from collections import OrderedDict, defaultdict
@@ -1807,6 +1854,23 @@ def to_dict(self, orient: str = "dict", into=dict):
18071854
)
18081855
)
18091856

1857+
elif orient == "tight":
1858+
return into_c(
1859+
(
1860+
("index", self.index.tolist()),
1861+
("columns", self.columns.tolist()),
1862+
(
1863+
"data",
1864+
[
1865+
list(map(maybe_box_native, t))
1866+
for t in self.itertuples(index=False, name=None)
1867+
],
1868+
),
1869+
("index_names", list(self.index.names)),
1870+
("column_names", list(self.columns.names)),
1871+
)
1872+
)
1873+
18101874
elif orient == "series":
18111875
return into_c((k, v) for k, v in self.items())
18121876

pandas/tests/frame/methods/test_to_dict.py

+32
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
from pandas import (
1212
DataFrame,
13+
Index,
14+
MultiIndex,
1315
Series,
1416
Timestamp,
1517
)
@@ -312,3 +314,33 @@ def test_to_dict_mixed_numeric_frame(self):
312314
result = df.reset_index().to_dict("records")
313315
expected = [{"index": 0, "a": 1.0, "b": 9.0}]
314316
assert result == expected
317+
318+
@pytest.mark.parametrize(
319+
"index",
320+
[
321+
None,
322+
Index(["aa", "bb"]),
323+
Index(["aa", "bb"], name="cc"),
324+
MultiIndex.from_tuples([("a", "b"), ("a", "c")]),
325+
MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]),
326+
],
327+
)
328+
@pytest.mark.parametrize(
329+
"columns",
330+
[
331+
["x", "y"],
332+
Index(["x", "y"]),
333+
Index(["x", "y"], name="z"),
334+
MultiIndex.from_tuples([("x", 1), ("y", 2)]),
335+
MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]),
336+
],
337+
)
338+
def test_to_dict_orient_tight(self, index, columns):
339+
df = DataFrame.from_records(
340+
[[1, 3], [2, 4]],
341+
columns=columns,
342+
index=index,
343+
)
344+
roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight")
345+
346+
tm.assert_frame_equal(df, roundtrip)

0 commit comments

Comments
 (0)