From ecdd114f75231936995e3c02b96b8686a414dfaf Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 24 Sep 2018 22:39:57 -0400 Subject: [PATCH 1/7] DOC: fix a failing doctest in DataFrame.to_dict --- ci/doctests.sh | 2 +- pandas/core/frame.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/doctests.sh b/ci/doctests.sh index 48774a1e4d00d..37ee604f78123 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -21,7 +21,7 @@ if [ "$DOCTEST" ]; then # DataFrame / Series docstrings pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-axes -combine -isin -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_stata" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 81d5c112885ec..135701e5850de 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1085,7 +1085,8 @@ def to_dict(self, orient='dict', into=dict): Returns ------- - result : collections.Mapping like {column -> {index -> value}} + collections.Mapping + like {column -> {index -> value}} See Also -------- @@ -1122,7 +1123,7 @@ def to_dict(self, orient='dict', into=dict): [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] >>> df.to_dict('index') - {'a': {'col1': 1.0, 'col2': 0.5}, 'b': {'col1': 2.0, 'col2': 0.75}} + {'a': {'col1': 1, 'col2': 0.5}, 'b': {'col1': 2, 'col2': 0.75}} You can also specify the mapping type. From fe9be1dece27cad9ed80a8bdc565d1edd62d3e6c Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Tue, 25 Sep 2018 21:50:38 -0400 Subject: [PATCH 2/7] Add a better example --- pandas/core/frame.py | 46 ++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 135701e5850de..112c87c76f15c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1090,54 +1090,54 @@ def to_dict(self, orient='dict', into=dict): See Also -------- - DataFrame.from_dict: create a DataFrame from a dictionary - DataFrame.to_json: convert a DataFrame to JSON format + DataFrame.from_dict: Create a DataFrame from a dictionary. + DataFrame.to_json: Convert a DataFrame to JSON format. Examples -------- - >>> df = pd.DataFrame({'col1': [1, 2], - ... 'col2': [0.5, 0.75]}, - ... index=['a', 'b']) + >>> df = pd.DataFrame({'legs': [4, 2], + ... 'wings': [0, 2]}, + ... index=['cat', 'falcon']) >>> df - col1 col2 - a 1 0.50 - b 2 0.75 + legs wings + cat 4 0 + falcon 2 2 >>> df.to_dict() - {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} + {'legs': {'cat': 4, 'falcon': 2}, 'wings': {'cat': 0, 'falcon': 2}} You can specify the return orientation. >>> df.to_dict('series') - {'col1': a 1 - b 2 - Name: col1, dtype: int64, - 'col2': a 0.50 - b 0.75 - Name: col2, dtype: float64} + {'legs': cat 4 + falcon 2 + Name: legs, dtype: int64, + 'wings': cat 0 + falcon 2 + Name: wings, dtype: int64} >>> df.to_dict('split') - {'index': ['a', 'b'], 'columns': ['col1', 'col2'], - 'data': [[1.0, 0.5], [2.0, 0.75]]} + {'index': ['cat', 'falcon'], 'columns': ['legs', 'wings'], + 'data': [[4, 0], [2, 2]]} >>> df.to_dict('records') - [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] + [{'legs': 4, 'wings': 0}, {'legs': 2, 'wings': 2}] >>> df.to_dict('index') - {'a': {'col1': 1, 'col2': 0.5}, 'b': {'col1': 2, 'col2': 0.75}} + {'cat': {'legs': 4, 'wings': 0}, 'falcon': {'legs': 2, 'wings': 2}} You can also specify the mapping type. >>> from collections import OrderedDict, defaultdict >>> df.to_dict(into=OrderedDict) - OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])), - ('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))]) + OrderedDict([('legs', OrderedDict([('cat', 4), ('falcon', 2)])), + ('wings', OrderedDict([('cat', 0), ('falcon', 2)]))]) If you want a `defaultdict`, you need to initialize it: >>> dd = defaultdict(list) >>> df.to_dict('records', into=dd) - [defaultdict(, {'col1': 1.0, 'col2': 0.5}), - defaultdict(, {'col1': 2.0, 'col2': 0.75})] + [defaultdict(, {'legs': 4, 'wings': 0}), + defaultdict(, {'legs': 2, 'wings': 2})] """ if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " From 0ba11cae85aee1f99a4a1f13db9f219d7dfd0016 Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 28 Sep 2018 10:19:52 -0400 Subject: [PATCH 3/7] Add dict and list to the returned types --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index aa0e91b1235a0..e1f7b443c7713 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1085,7 +1085,7 @@ def to_dict(self, orient='dict', into=dict): Returns ------- - collections.Mapping + dict, list or collections.Mapping like {column -> {index -> value}} See Also From db130aedc4441d99ddf27670ebca81d9122984cf Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Fri, 28 Sep 2018 10:30:46 -0400 Subject: [PATCH 4/7] Revert to row/col example --- pandas/core/frame.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e1f7b443c7713..d51ec813e9346 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1095,49 +1095,49 @@ def to_dict(self, orient='dict', into=dict): Examples -------- - >>> df = pd.DataFrame({'legs': [4, 2], - ... 'wings': [0, 2]}, - ... index=['cat', 'falcon']) + >>> df = pd.DataFrame({'col1': [1, 2], + ... 'col2': [0.5, 0.75]}, + ... index=['a', 'b']) >>> df - legs wings - cat 4 0 - falcon 2 2 + col1 col2 + a 1 0.50 + b 2 0.75 >>> df.to_dict() - {'legs': {'cat': 4, 'falcon': 2}, 'wings': {'cat': 0, 'falcon': 2}} + {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} You can specify the return orientation. >>> df.to_dict('series') - {'legs': cat 4 - falcon 2 - Name: legs, dtype: int64, - 'wings': cat 0 - falcon 2 - Name: wings, dtype: int64} + {'col1': a 1 + b 2 + Name: col1, dtype: int64, + 'col2': a 0.50 + b 0.75 + Name: col2, dtype: float64} >>> df.to_dict('split') - {'index': ['cat', 'falcon'], 'columns': ['legs', 'wings'], - 'data': [[4, 0], [2, 2]]} + {'index': ['a', 'b'], 'columns': ['col1', 'col2'], + 'data': [[1.0, 0.5], [2.0, 0.75]]} >>> df.to_dict('records') - [{'legs': 4, 'wings': 0}, {'legs': 2, 'wings': 2}] + [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] >>> df.to_dict('index') - {'cat': {'legs': 4, 'wings': 0}, 'falcon': {'legs': 2, 'wings': 2}} + {'a': {'col1': 1, 'col2': 0.5}, 'b': {'col1': 2, 'col2': 0.75}} You can also specify the mapping type. >>> from collections import OrderedDict, defaultdict >>> df.to_dict(into=OrderedDict) - OrderedDict([('legs', OrderedDict([('cat', 4), ('falcon', 2)])), - ('wings', OrderedDict([('cat', 0), ('falcon', 2)]))]) + OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])), + ('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))]) If you want a `defaultdict`, you need to initialize it: >>> dd = defaultdict(list) >>> df.to_dict('records', into=dd) - [defaultdict(, {'legs': 4, 'wings': 0}), - defaultdict(, {'legs': 2, 'wings': 2})] + [defaultdict(, {'col1': 1.0, 'col2': 0.5}), + defaultdict(, {'col1': 2.0, 'col2': 0.75})] """ if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " From 483117415788a6a9caf9366ca0fe7a141a2a26cc Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Sat, 29 Sep 2018 08:46:54 -0400 Subject: [PATCH 5/7] Use 'row1' and 'row2' insteaf of 'a' and 'b' --- pandas/core/frame.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d51ec813e9346..d3f6cacdfecef 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1097,40 +1097,40 @@ def to_dict(self, orient='dict', into=dict): -------- >>> df = pd.DataFrame({'col1': [1, 2], ... 'col2': [0.5, 0.75]}, - ... index=['a', 'b']) + ... index=['row1', 'row2']) >>> df - col1 col2 - a 1 0.50 - b 2 0.75 + col1 col2 + row1 1 0.50 + row2 2 0.75 >>> df.to_dict() - {'col1': {'a': 1, 'b': 2}, 'col2': {'a': 0.5, 'b': 0.75}} + {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}} You can specify the return orientation. >>> df.to_dict('series') - {'col1': a 1 - b 2 - Name: col1, dtype: int64, - 'col2': a 0.50 - b 0.75 - Name: col2, dtype: float64} + {'col1': row1 1 + row2 2 + Name: col1, dtype: int64, + 'col2': row1 0.50 + row2 0.75 + Name: col2, dtype: float64} >>> df.to_dict('split') - {'index': ['a', 'b'], 'columns': ['col1', 'col2'], + {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'], 'data': [[1.0, 0.5], [2.0, 0.75]]} >>> df.to_dict('records') [{'col1': 1.0, 'col2': 0.5}, {'col1': 2.0, 'col2': 0.75}] >>> df.to_dict('index') - {'a': {'col1': 1, 'col2': 0.5}, 'b': {'col1': 2, 'col2': 0.75}} + {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}} You can also specify the mapping type. >>> from collections import OrderedDict, defaultdict >>> df.to_dict(into=OrderedDict) - OrderedDict([('col1', OrderedDict([('a', 1), ('b', 2)])), - ('col2', OrderedDict([('a', 0.5), ('b', 0.75)]))]) + OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])), + ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))]) If you want a `defaultdict`, you need to initialize it: From e11abb10cc395c9dccbb6baa32fc3e86c273aeef Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Sat, 29 Sep 2018 12:32:31 -0400 Subject: [PATCH 6/7] Add a complete description in the Returns section --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d3f6cacdfecef..7e2f5ae5f6d30 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1086,7 +1086,8 @@ def to_dict(self, orient='dict', into=dict): Returns ------- dict, list or collections.Mapping - like {column -> {index -> value}} + Return a collections.Mapping object representing the DataFrame. + The resulting transformation depends on the `orient` parameter. See Also -------- From 64f6050c935ee88b0dcfe6356c65f94597cbe01b Mon Sep 17 00:00:00 2001 From: Thierry Moisan Date: Mon, 15 Oct 2018 09:33:53 -0400 Subject: [PATCH 7/7] Update CI to check pandas.DataFrame.to_dict doctests --- ci/code_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index eced3bf34e7c6..f2188e6bb56b8 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -118,7 +118,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then MSG='Doctests frame.py' ; echo $MSG pytest --doctest-modules -v pandas/core/frame.py \ - -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata" + -k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_stata" RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Doctests series.py' ; echo $MSG