Skip to content

CLN: to_dict #57159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 0 additions & 23 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@
find_common_type,
infer_dtype_from_scalar,
invalidate_string_dtypes,
maybe_box_native,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -1983,28 +1982,6 @@ def to_numpy(

return result

def _create_data_for_split_and_tight_to_dict(
self, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
) -> list:
"""
Simple helper method to create data for to ``to_dict(orient="split")`` and
``to_dict(orient="tight")`` to create the main output data
"""
if are_all_object_dtype_cols:
data = [
list(map(maybe_box_native, t))
for t in self.itertuples(index=False, name=None)
]
else:
data = [list(t) for t in self.itertuples(index=False, name=None)]
if object_dtype_indices:
# If we have object_dtype_cols, apply maybe_box_naive after list
# comprehension for perf
for row in data:
for i in object_dtype_indices:
row[i] = maybe_box_native(row[i])
return data

@overload
def to_dict(
self,
Expand Down
75 changes: 44 additions & 31 deletions pandas/core/methods/to_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,34 @@
from pandas.core import common as com

if TYPE_CHECKING:
from collections.abc import Generator

from pandas._typing import MutableMappingT

from pandas import DataFrame


def create_data_for_split(
df: DataFrame, are_all_object_dtype_cols: bool, object_dtype_indices: list[int]
) -> Generator[list, None, None]:
"""
Simple helper method to create data for to ``to_dict(orient="split")``
to create the main output data
"""
if are_all_object_dtype_cols:
for tup in df.itertuples(index=False, name=None):
yield list(map(maybe_box_native, tup))
else:
for tup in df.itertuples(index=False, name=None):
data = list(tup)
if object_dtype_indices:
# If we have object_dtype_cols, apply maybe_box_naive after
# for perf
for i in object_dtype_indices:
data[i] = maybe_box_native(data[i])
yield data


@overload
def to_dict(
df: DataFrame,
Expand Down Expand Up @@ -152,39 +175,38 @@ def to_dict(
# GH46470 Return quickly if orient series to avoid creating dtype objects
return into_c((k, v) for k, v in df.items())

if orient == "dict":
return into_c((k, v.to_dict(into=into)) for k, v in df.items())

box_native_indices = [
i
for i, col_dtype in enumerate(df.dtypes.values)
if col_dtype == np.dtype(object) or isinstance(col_dtype, ExtensionDtype)
]
box_na_values = [
lib.no_default if not isinstance(col_dtype, BaseMaskedDtype) else libmissing.NA
for i, col_dtype in enumerate(df.dtypes.values)
]
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)

if orient == "dict":
return into_c((k, v.to_dict(into=into)) for k, v in df.items())
are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)

elif orient == "list":
if orient == "list":
object_dtype_indices_as_set: set[int] = set(box_native_indices)
box_na_values = (
lib.no_default
if not isinstance(col_dtype, BaseMaskedDtype)
else libmissing.NA
for col_dtype in df.dtypes.values
)
return into_c(
(
k,
list(
map(
maybe_box_native, v.to_numpy(na_value=box_na_values[i]).tolist()
)
)
list(map(maybe_box_native, v.to_numpy(na_value=box_na_value).tolist()))
if i in object_dtype_indices_as_set
else v.to_numpy().tolist(),
)
for i, (k, v) in enumerate(df.items())
for i, (box_na_value, (k, v)) in enumerate(zip(box_na_values, df.items()))
)

elif orient == "split":
data = df._create_data_for_split_and_tight_to_dict(
are_all_object_dtype_cols, box_native_indices
data = list(
create_data_for_split(df, are_all_object_dtype_cols, box_native_indices)
)

return into_c(
Expand All @@ -196,10 +218,6 @@ def to_dict(
)

elif orient == "tight":
data = df._create_data_for_split_and_tight_to_dict(
are_all_object_dtype_cols, box_native_indices
)

return into_c(
((("index", df.index.tolist()),) if index else ())
+ (
Expand All @@ -219,11 +237,9 @@ def to_dict(
elif orient == "records":
columns = df.columns.tolist()
if are_all_object_dtype_cols:
rows = (
dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
)
return [
into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
into_c(zip(columns, map(maybe_box_native, row)))
for row in df.itertuples(index=False, name=None)
]
else:
data = [
Expand Down Expand Up @@ -252,24 +268,21 @@ def to_dict(
)
elif box_native_indices:
object_dtype_indices_as_set = set(box_native_indices)
is_object_dtype_by_index = [
i in object_dtype_indices_as_set for i in range(len(df.columns))
]
return into_c(
(
t[0],
{
columns[i]: maybe_box_native(v)
if is_object_dtype_by_index[i]
column: maybe_box_native(v)
if i in object_dtype_indices_as_set
else v
for i, v in enumerate(t[1:])
for i, (column, v) in enumerate(zip(columns, t[1:]))
},
)
for t in df.itertuples(name=None)
)
else:
return into_c(
(t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
(t[0], dict(zip(columns, t[1:]))) for t in df.itertuples(name=None)
)

else:
Expand Down