Skip to content

Commit a0a6e04

Browse files
TYP: Correct type annotation for to_dict. (#55130)
* Correct type annotation for to_dict. The `into` argument of DataFrame.to_dict and Series.to_dict can be either a class or instance of a class of dict; this is covariant - subclasses of dict can also be used. The argument was annotated as `type[dict]` though, so type checkers marked passing initialized objects (required for collections.defaultdict) as an incorrect argument type. Fix by annotating `into` to take either a subclass of dict or an initialized instance of a subclass of dict. * Use generic MutableMapping type for to_dict method. Unfortunately a generic type annotation with a default triggers an existing mypy limitation (python/mypy#3737). The current workaround is to use overloads and then not annotate the implementation containing the default parameter; this still enables mypy to deduce correct return types. Two overloads are added for Series.to_dict, even though they could be combined using a Union type, as at least two overloads are required for a single method. * Fix formatting * return annotation for non-overload * no keyword should return dict * swap overload order to work for dict subclasses that are passed as keywords * fix tests --------- Co-authored-by: Torsten Wörtwein <[email protected]>
1 parent 9282d9f commit a0a6e04

File tree

5 files changed

+123
-23
lines changed

5 files changed

+123
-23
lines changed

pandas/_typing.py

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
Hashable,
55
Iterator,
66
Mapping,
7+
MutableMapping,
78
Sequence,
89
)
910
from datetime import (
@@ -103,6 +104,7 @@
103104
TypeGuard: Any = None
104105

105106
HashableT = TypeVar("HashableT", bound=Hashable)
107+
MutableMappingT = TypeVar("MutableMappingT", bound=MutableMapping)
106108

107109
# array-like
108110

pandas/core/frame.py

+34-7
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,7 @@
230230
Level,
231231
MergeHow,
232232
MergeValidate,
233+
MutableMappingT,
233234
NaAction,
234235
NaPosition,
235236
NsmallestNlargestKeep,
@@ -1927,6 +1928,27 @@ def _create_data_for_split_and_tight_to_dict(
19271928
def to_dict(
19281929
self,
19291930
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
1931+
*,
1932+
into: type[MutableMappingT] | MutableMappingT,
1933+
index: bool = ...,
1934+
) -> MutableMappingT:
1935+
...
1936+
1937+
@overload
1938+
def to_dict(
1939+
self,
1940+
orient: Literal["records"],
1941+
*,
1942+
into: type[MutableMappingT] | MutableMappingT,
1943+
index: bool = ...,
1944+
) -> list[MutableMappingT]:
1945+
...
1946+
1947+
@overload
1948+
def to_dict(
1949+
self,
1950+
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
1951+
*,
19301952
into: type[dict] = ...,
19311953
index: bool = ...,
19321954
) -> dict:
@@ -1936,11 +1958,14 @@ def to_dict(
19361958
def to_dict(
19371959
self,
19381960
orient: Literal["records"],
1961+
*,
19391962
into: type[dict] = ...,
19401963
index: bool = ...,
19411964
) -> list[dict]:
19421965
...
19431966

1967+
# error: Incompatible default for argument "into" (default has type "type
1968+
# [dict[Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT")
19441969
@deprecate_nonkeyword_arguments(
19451970
version="3.0", allowed_args=["self", "orient"], name="to_dict"
19461971
)
@@ -1949,9 +1974,10 @@ def to_dict(
19491974
orient: Literal[
19501975
"dict", "list", "series", "split", "tight", "records", "index"
19511976
] = "dict",
1952-
into: type[dict] = dict,
1977+
into: type[MutableMappingT]
1978+
| MutableMappingT = dict, # type: ignore[assignment]
19531979
index: bool = True,
1954-
) -> dict | list[dict]:
1980+
) -> MutableMappingT | list[MutableMappingT]:
19551981
"""
19561982
Convert the DataFrame to a dictionary.
19571983
@@ -1979,7 +2005,7 @@ def to_dict(
19792005
'tight' as an allowed value for the ``orient`` argument
19802006
19812007
into : class, default dict
1982-
The collections.abc.Mapping subclass used for all Mappings
2008+
The collections.abc.MutableMapping subclass used for all Mappings
19832009
in the return value. Can be the actual class or an empty
19842010
instance of the mapping type you want. If you want a
19852011
collections.defaultdict, you must pass it initialized.
@@ -1993,9 +2019,10 @@ def to_dict(
19932019
19942020
Returns
19952021
-------
1996-
dict, list or collections.abc.Mapping
1997-
Return a collections.abc.Mapping object representing the DataFrame.
1998-
The resulting transformation depends on the `orient` parameter.
2022+
dict, list or collections.abc.MutableMapping
2023+
Return a collections.abc.MutableMapping object representing the
2024+
DataFrame. The resulting transformation depends on the `orient`
2025+
parameter.
19992026
20002027
See Also
20012028
--------
@@ -2054,7 +2081,7 @@ def to_dict(
20542081
"""
20552082
from pandas.core.methods.to_dict import to_dict
20562083

2057-
return to_dict(self, orient, into, index)
2084+
return to_dict(self, orient, into=into, index=index)
20582085

20592086
@deprecate_nonkeyword_arguments(
20602087
version="3.0", allowed_args=["self", "destination_table"], name="to_gbq"

pandas/core/methods/to_dict.py

+56-6
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import (
44
TYPE_CHECKING,
55
Literal,
6+
overload,
67
)
78
import warnings
89

@@ -16,17 +17,66 @@
1617
from pandas.core import common as com
1718

1819
if TYPE_CHECKING:
20+
from pandas._typing import MutableMappingT
21+
1922
from pandas import DataFrame
2023

2124

25+
@overload
26+
def to_dict(
27+
df: DataFrame,
28+
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
29+
*,
30+
into: type[MutableMappingT] | MutableMappingT,
31+
index: bool = ...,
32+
) -> MutableMappingT:
33+
...
34+
35+
36+
@overload
37+
def to_dict(
38+
df: DataFrame,
39+
orient: Literal["records"],
40+
*,
41+
into: type[MutableMappingT] | MutableMappingT,
42+
index: bool = ...,
43+
) -> list[MutableMappingT]:
44+
...
45+
46+
47+
@overload
48+
def to_dict(
49+
df: DataFrame,
50+
orient: Literal["dict", "list", "series", "split", "tight", "index"] = ...,
51+
*,
52+
into: type[dict] = ...,
53+
index: bool = ...,
54+
) -> dict:
55+
...
56+
57+
58+
@overload
59+
def to_dict(
60+
df: DataFrame,
61+
orient: Literal["records"],
62+
*,
63+
into: type[dict] = ...,
64+
index: bool = ...,
65+
) -> list[dict]:
66+
...
67+
68+
69+
# error: Incompatible default for argument "into" (default has type "type[dict
70+
# [Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT")
2271
def to_dict(
2372
df: DataFrame,
2473
orient: Literal[
2574
"dict", "list", "series", "split", "tight", "records", "index"
2675
] = "dict",
27-
into: type[dict] = dict,
76+
*,
77+
into: type[MutableMappingT] | MutableMappingT = dict, # type: ignore[assignment]
2878
index: bool = True,
29-
) -> dict | list[dict]:
79+
) -> MutableMappingT | list[MutableMappingT]:
3080
"""
3181
Convert the DataFrame to a dictionary.
3282
@@ -54,7 +104,7 @@ def to_dict(
54104
'tight' as an allowed value for the ``orient`` argument
55105
56106
into : class, default dict
57-
The collections.abc.Mapping subclass used for all Mappings
107+
The collections.abc.MutableMapping subclass used for all Mappings
58108
in the return value. Can be the actual class or an empty
59109
instance of the mapping type you want. If you want a
60110
collections.defaultdict, you must pass it initialized.
@@ -69,8 +119,8 @@ def to_dict(
69119
Returns
70120
-------
71121
dict, list or collections.abc.Mapping
72-
Return a collections.abc.Mapping object representing the DataFrame.
73-
The resulting transformation depends on the `orient` parameter.
122+
Return a collections.abc.MutableMapping object representing the
123+
DataFrame. The resulting transformation depends on the `orient` parameter.
74124
"""
75125
if not df.columns.is_unique:
76126
warnings.warn(
@@ -103,7 +153,7 @@ def to_dict(
103153
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
104154

105155
if orient == "dict":
106-
return into_c((k, v.to_dict(into)) for k, v in df.items())
156+
return into_c((k, v.to_dict(into=into)) for k, v in df.items())
107157

108158
elif orient == "list":
109159
object_dtype_indices_as_set: set[int] = set(box_native_indices)

pandas/core/series.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
from pandas.util._decorators import (
4949
Appender,
5050
Substitution,
51+
deprecate_nonkeyword_arguments,
5152
doc,
5253
)
5354
from pandas.util._exceptions import find_stack_level
@@ -167,6 +168,7 @@
167168
IndexKeyFunc,
168169
IndexLabel,
169170
Level,
171+
MutableMappingT,
170172
NaPosition,
171173
NumpySorter,
172174
NumpyValueArrayLike,
@@ -1922,21 +1924,40 @@ def keys(self) -> Index:
19221924
"""
19231925
return self.index
19241926

1925-
def to_dict(self, into: type[dict] = dict) -> dict:
1927+
@overload
1928+
def to_dict(
1929+
self, *, into: type[MutableMappingT] | MutableMappingT
1930+
) -> MutableMappingT:
1931+
...
1932+
1933+
@overload
1934+
def to_dict(self, *, into: type[dict] = ...) -> dict:
1935+
...
1936+
1937+
# error: Incompatible default for argument "into" (default has type "type[
1938+
# dict[Any, Any]]", argument has type "type[MutableMappingT] | MutableMappingT")
1939+
@deprecate_nonkeyword_arguments(
1940+
version="3.0", allowed_args=["self"], name="to_dict"
1941+
)
1942+
def to_dict(
1943+
self,
1944+
into: type[MutableMappingT]
1945+
| MutableMappingT = dict, # type: ignore[assignment]
1946+
) -> MutableMappingT:
19261947
"""
19271948
Convert Series to {label -> value} dict or dict-like object.
19281949
19291950
Parameters
19301951
----------
19311952
into : class, default dict
1932-
The collections.abc.Mapping subclass to use as the return
1933-
object. Can be the actual class or an empty
1934-
instance of the mapping type you want. If you want a
1935-
collections.defaultdict, you must pass it initialized.
1953+
The collections.abc.MutableMapping subclass to use as the return
1954+
object. Can be the actual class or an empty instance of the mapping
1955+
type you want. If you want a collections.defaultdict, you must
1956+
pass it initialized.
19361957
19371958
Returns
19381959
-------
1939-
collections.abc.Mapping
1960+
collections.abc.MutableMapping
19401961
Key-value representation of Series.
19411962
19421963
Examples
@@ -1945,10 +1966,10 @@ def to_dict(self, into: type[dict] = dict) -> dict:
19451966
>>> s.to_dict()
19461967
{0: 1, 1: 2, 2: 3, 3: 4}
19471968
>>> from collections import OrderedDict, defaultdict
1948-
>>> s.to_dict(OrderedDict)
1969+
>>> s.to_dict(into=OrderedDict)
19491970
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
19501971
>>> dd = defaultdict(list)
1951-
>>> s.to_dict(dd)
1972+
>>> s.to_dict(into=dd)
19521973
defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
19531974
"""
19541975
# GH16122

pandas/tests/series/methods/test_to_dict.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,12 @@ class TestSeriesToDict:
1313
)
1414
def test_to_dict(self, mapping, datetime_series):
1515
# GH#16122
16-
result = Series(datetime_series.to_dict(mapping), name="ts")
16+
result = Series(datetime_series.to_dict(into=mapping), name="ts")
1717
expected = datetime_series.copy()
1818
expected.index = expected.index._with_freq(None)
1919
tm.assert_series_equal(result, expected)
2020

21-
from_method = Series(datetime_series.to_dict(collections.Counter))
21+
from_method = Series(datetime_series.to_dict(into=collections.Counter))
2222
from_constructor = Series(collections.Counter(datetime_series.items()))
2323
tm.assert_series_equal(from_method, from_constructor)
2424

0 commit comments

Comments
 (0)