diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 48eff0543ad4d..3a7c7e798ed88 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -175,7 +175,7 @@ Deprecations - Lookups on a :class:`Series` with a single-item list containing a slice (e.g. ``ser[[slice(0, 4)]]``) are deprecated, will raise in a future version. Either convert the list to tuple, or pass the slice directly instead (:issue:`31333`) - :meth:`DataFrame.mean` and :meth:`DataFrame.median` with ``numeric_only=None`` will include datetime64 and datetime64tz columns in a future version (:issue:`29941`) - Setting values with ``.loc`` using a positional slice is deprecated and will raise in a future version. Use ``.loc`` with labels or ``.iloc`` with positions instead (:issue:`31840`) -- +- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b0909e23b44c5..204c916c88fa0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1401,11 +1401,45 @@ def to_dict(self, orient="dict", into=dict): ) # GH16122 into_c = com.standardize_mapping(into) - if orient.lower().startswith("d"): + + orient = orient.lower() + # GH32515 + if orient.startswith(("d", "l", "s", "r", "i")) and orient not in { + "dict", + "list", + "series", + "split", + "records", + "index", + }: + warnings.warn( + "Using short name for 'orient' is deprecated. Only the " + "options: ('dict', list, 'series', 'split', 'records', 'index') " + "will be used in a future version. Use one of the above " + "to silence this warning.", + FutureWarning, + ) + + if orient.startswith("d"): + orient = "dict" + elif orient.startswith("l"): + orient = "list" + elif orient.startswith("sp"): + orient = "split" + elif orient.startswith("s"): + orient = "series" + elif orient.startswith("r"): + orient = "records" + elif orient.startswith("i"): + orient = "index" + + if orient == "dict": return into_c((k, v.to_dict(into)) for k, v in self.items()) - elif orient.lower().startswith("l"): + + elif orient == "list": return into_c((k, v.tolist()) for k, v in self.items()) - elif orient.lower().startswith("sp"): + + elif orient == "split": return into_c( ( ("index", self.index.tolist()), @@ -1419,9 +1453,11 @@ def to_dict(self, orient="dict", into=dict): ), ) ) - elif orient.lower().startswith("s"): + + elif orient == "series": return into_c((k, com.maybe_box_datetimelike(v)) for k, v in self.items()) - elif orient.lower().startswith("r"): + + elif orient == "records": columns = self.columns.tolist() rows = ( dict(zip(columns, row)) @@ -1431,13 +1467,15 @@ def to_dict(self, orient="dict", into=dict): into_c((k, com.maybe_box_datetimelike(v)) for k, v in row.items()) for row in rows ] - elif orient.lower().startswith("i"): + + elif orient == "index": if not self.index.is_unique: raise ValueError("DataFrame index must be unique for orient='index'.") return into_c( (t[0], dict(zip(self.columns, t[1:]))) for t in self.itertuples(name=None) ) + else: raise ValueError(f"orient '{orient}' not understood") diff --git a/pandas/tests/frame/methods/test_to_dict.py b/pandas/tests/frame/methods/test_to_dict.py index cd9bd169322fd..f1656b46cf356 100644 --- a/pandas/tests/frame/methods/test_to_dict.py +++ b/pandas/tests/frame/methods/test_to_dict.py @@ -70,8 +70,17 @@ def test_to_dict_invalid_orient(self): with pytest.raises(ValueError, match=msg): df.to_dict(orient="xinvalid") + @pytest.mark.parametrize("orient", ["d", "l", "r", "sp", "s", "i"]) + def test_to_dict_short_orient_warns(self, orient): + # GH#32515 + df = DataFrame({"A": [0, 1]}) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + df.to_dict(orient=orient) + @pytest.mark.parametrize("mapping", [dict, defaultdict(list), OrderedDict]) def test_to_dict(self, mapping): + # orient= should only take the listed options + # see GH#32515 test_data = {"A": {"1": 1, "2": 2}, "B": {"1": "1", "2": "2", "3": "3"}} # GH#16122 @@ -81,19 +90,19 @@ def test_to_dict(self, mapping): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] - recons_data = DataFrame(test_data).to_dict("l", mapping) + recons_data = DataFrame(test_data).to_dict("list", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][int(k2) - 1] - recons_data = DataFrame(test_data).to_dict("s", mapping) + recons_data = DataFrame(test_data).to_dict("series", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] - recons_data = DataFrame(test_data).to_dict("sp", mapping) + recons_data = DataFrame(test_data).to_dict("split", mapping) expected_split = { "columns": ["A", "B"], "index": ["1", "2", "3"], @@ -101,7 +110,7 @@ def test_to_dict(self, mapping): } tm.assert_dict_equal(recons_data, expected_split) - recons_data = DataFrame(test_data).to_dict("r", mapping) + recons_data = DataFrame(test_data).to_dict("records", mapping) expected_records = [ {"A": 1.0, "B": "1"}, {"A": 2.0, "B": "2"}, @@ -113,7 +122,7 @@ def test_to_dict(self, mapping): tm.assert_dict_equal(l, r) # GH#10844 - recons_data = DataFrame(test_data).to_dict("i") + recons_data = DataFrame(test_data).to_dict("index") for k, v in test_data.items(): for k2, v2 in v.items(): @@ -121,7 +130,7 @@ def test_to_dict(self, mapping): df = DataFrame(test_data) df["duped"] = df[df.columns[0]] - recons_data = df.to_dict("i") + recons_data = df.to_dict("index") comp_data = test_data.copy() comp_data["duped"] = comp_data[df.columns[0]] for k, v in comp_data.items():