diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index b7efec8fd2e89..989963ec84db9 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -134,6 +134,28 @@ Previously, negative arguments returned empty frames. df.groupby("A").nth(slice(1, -1)) df.groupby("A").nth([slice(None, 1), slice(-1, None)]) +.. _whatsnew_140.dict_tight: + +DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +A new ``'tight'`` dictionary format that preserves :class:`MultiIndex` entries and names +is now available with the :meth:`DataFrame.from_dict` and :meth:`DataFrame.to_dict` methods +and can be used with the standard ``json`` library to produce a tight +representation of :class:`DataFrame` objects (:issue:`4889`). + +.. ipython:: python + + df = pd.DataFrame.from_records( + [[1, 3], [2, 4]], + index=pd.MultiIndex.from_tuples([("a", "b"), ("a", "c")], + names=["n1", "n2"]), + columns=pd.MultiIndex.from_tuples([("x", 1), ("y", 2)], + names=["z1", "z2"]), + ) + df + df.to_dict(orient='tight') + .. _whatsnew_140.enhancements.other: Other enhancements diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1bb3dda0312cd..f9f5c89d4bd4d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1520,15 +1520,21 @@ def from_dict( ---------- data : dict Of the form {field : array-like} or {field : dict}. - orient : {'columns', 'index'}, default 'columns' + orient : {'columns', 'index', 'tight'}, default 'columns' The "orientation" of the data. If the keys of the passed dict should be the columns of the resulting DataFrame, pass 'columns' (default). Otherwise if the keys should be rows, pass 'index'. + If 'tight', assume a dict with keys ['index', 'columns', 'data', + 'index_names', 'column_names']. + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + dtype : dtype, default None Data type to force, otherwise infer. columns : list, default None Column labels to use when ``orient='index'``. Raises a ValueError - if used with ``orient='columns'``. + if used with ``orient='columns'`` or ``orient='tight'``. Returns ------- @@ -1539,6 +1545,7 @@ def from_dict( DataFrame.from_records : DataFrame from structured ndarray, sequence of tuples or dicts, or DataFrame. DataFrame : DataFrame object creation using constructor. + DataFrame.to_dict : Convert the DataFrame to a dictionary. Examples -------- @@ -1569,6 +1576,21 @@ def from_dict( A B C D row_1 3 2 1 0 row_2 a b c d + + Specify ``orient='tight'`` to create the DataFrame using a 'tight' + format: + + >>> data = {'index': [('a', 'b'), ('a', 'c')], + ... 'columns': [('x', 1), ('y', 2)], + ... 'data': [[1, 3], [2, 4]], + ... 'index_names': ['n1', 'n2'], + ... 'column_names': ['z1', 'z2']} + >>> pd.DataFrame.from_dict(data, orient='tight') + z1 x y + z2 1 2 + n1 n2 + a b 1 3 + c 2 4 """ index = None orient = orient.lower() @@ -1579,13 +1601,28 @@ def from_dict( data = _from_nested_dict(data) else: data, index = list(data.values()), list(data.keys()) - elif orient == "columns": + elif orient == "columns" or orient == "tight": if columns is not None: - raise ValueError("cannot use columns parameter with orient='columns'") + raise ValueError(f"cannot use columns parameter with orient='{orient}'") else: # pragma: no cover raise ValueError("only recognize index or columns for orient") - return cls(data, index=index, columns=columns, dtype=dtype) + if orient != "tight": + return cls(data, index=index, columns=columns, dtype=dtype) + else: + realdata = data["data"] + + def create_index(indexlist, namelist): + index: Index + if len(namelist) > 1: + index = MultiIndex.from_tuples(indexlist, names=namelist) + else: + index = Index(indexlist, name=namelist[0]) + return index + + index = create_index(data["index"], data["index_names"]) + columns = create_index(data["columns"], data["column_names"]) + return cls(realdata, index=index, columns=columns, dtype=dtype) def to_numpy( self, @@ -1675,6 +1712,9 @@ def to_dict(self, orient: str = "dict", into=dict): - 'series' : dict like {column -> Series(values)} - 'split' : dict like {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} + - 'tight' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values], + 'index_names' -> [index.names], 'column_names' -> [column.names]} - 'records' : list like [{column -> value}, ... , {column -> value}] - 'index' : dict like {index -> {column -> value}} @@ -1682,6 +1722,9 @@ def to_dict(self, orient: str = "dict", into=dict): Abbreviations are allowed. `s` indicates `series` and `sp` indicates `split`. + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + into : class, default dict The collections.abc.Mapping subclass used for all Mappings in the return value. Can be the actual class or an empty @@ -1731,6 +1774,10 @@ def to_dict(self, orient: str = "dict", into=dict): >>> df.to_dict('index') {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} + >>> df.to_dict('tight') + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], + 'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]} + You can also specify the mapping type. >>> from collections import OrderedDict, defaultdict @@ -1807,6 +1854,23 @@ def to_dict(self, orient: str = "dict", into=dict): ) ) + elif orient == "tight": + return into_c( + ( + ("index", self.index.tolist()), + ("columns", self.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in self.itertuples(index=False, name=None) + ], + ), + ("index_names", list(self.index.names)), + ("column_names", list(self.columns.names)), + ) + ) + elif orient == "series": return into_c((k, v) for k, v in self.items()) diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index c33f649206f54..31ea3e582eeb2 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -10,6 +10,8 @@ from pandas import ( DataFrame, + Index, + MultiIndex, Series, Timestamp, ) @@ -312,3 +314,33 @@ def test_to_dict_mixed_numeric_frame(self): result = df.reset_index().to_dict("records") expected = [{"index": 0, "a": 1.0, "b": 9.0}] assert result == expected + + @pytest.mark.parametrize( + "index", + [ + None, + Index(["aa", "bb"]), + Index(["aa", "bb"], name="cc"), + MultiIndex.from_tuples([("a", "b"), ("a", "c")]), + MultiIndex.from_tuples([("a", "b"), ("a", "c")], names=["n1", "n2"]), + ], + ) + @pytest.mark.parametrize( + "columns", + [ + ["x", "y"], + Index(["x", "y"]), + Index(["x", "y"], name="z"), + MultiIndex.from_tuples([("x", 1), ("y", 2)]), + MultiIndex.from_tuples([("x", 1), ("y", 2)], names=["z1", "z2"]), + ], + ) + def test_to_dict_orient_tight(self, index, columns): + df = DataFrame.from_records( + [[1, 3], [2, 4]], + columns=columns, + index=index, + ) + roundtrip = DataFrame.from_dict(df.to_dict(orient="tight"), orient="tight") + + tm.assert_frame_equal(df, roundtrip)