Skip to content

Commit f932bf9

Browse files
authored
Fix Series.to_frame(name=None) setting a None name (#16698)
In pandas 2.0, `to_frame(name=None)` allowed the resulting column name to be `None` pandas-dev/pandas#45523 Looks like based on the current default of `cudf.Series.to_frame`, this behavior was not reflected. Additionally, created a `SingleColumnFrame._to_frame` to more easily share the logic between `Series.to_frame` and `Index.to_frame` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #16698
1 parent 8f2d687 commit f932bf9

File tree

5 files changed

+77
-68
lines changed

5 files changed

+77
-68
lines changed

python/cudf/cudf/core/_base_index.py

Lines changed: 0 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -798,64 +798,6 @@ def fillna(self, value, downcast=None):
798798

799799
return super().fillna(value=value)
800800

801-
def to_frame(self, index=True, name=no_default):
802-
"""Create a DataFrame with a column containing this Index
803-
804-
Parameters
805-
----------
806-
index : boolean, default True
807-
Set the index of the returned DataFrame as the original Index
808-
name : object, defaults to index.name
809-
The passed name should substitute for the index name (if it has
810-
one).
811-
812-
Returns
813-
-------
814-
DataFrame
815-
DataFrame containing the original Index data.
816-
817-
See Also
818-
--------
819-
Index.to_series : Convert an Index to a Series.
820-
Series.to_frame : Convert Series to DataFrame.
821-
822-
Examples
823-
--------
824-
>>> import cudf
825-
>>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
826-
>>> idx.to_frame()
827-
animal
828-
animal
829-
Ant Ant
830-
Bear Bear
831-
Cow Cow
832-
833-
By default, the original Index is reused. To enforce a new Index:
834-
835-
>>> idx.to_frame(index=False)
836-
animal
837-
0 Ant
838-
1 Bear
839-
2 Cow
840-
841-
To override the name of the resulting column, specify `name`:
842-
843-
>>> idx.to_frame(index=False, name='zoo')
844-
zoo
845-
0 Ant
846-
1 Bear
847-
2 Cow
848-
"""
849-
850-
if name is no_default:
851-
col_name = 0 if self.name is None else self.name
852-
else:
853-
col_name = name
854-
855-
return cudf.DataFrame(
856-
{col_name: self._values}, index=self if index else None
857-
)
858-
859801
def to_arrow(self):
860802
"""Convert to a suitable Arrow object."""
861803
raise NotImplementedError

python/cudf/cudf/core/index.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,11 @@ def to_pandas(
529529
name=self.name,
530530
)
531531

532+
def to_frame(
533+
self, index: bool = True, name: Hashable = no_default
534+
) -> cudf.DataFrame:
535+
return self._as_int_index().to_frame(index=index, name=name)
536+
532537
@property
533538
def is_unique(self) -> bool:
534539
return True
@@ -1646,6 +1651,58 @@ def to_pandas(
16461651
result.name = self.name
16471652
return result
16481653

1654+
def to_frame(
1655+
self, index: bool = True, name: Hashable = no_default
1656+
) -> cudf.DataFrame:
1657+
"""Create a DataFrame with a column containing this Index
1658+
1659+
Parameters
1660+
----------
1661+
index : boolean, default True
1662+
Set the index of the returned DataFrame as the original Index
1663+
name : object, defaults to index.name
1664+
The passed name should substitute for the index name (if it has
1665+
one).
1666+
1667+
Returns
1668+
-------
1669+
DataFrame
1670+
DataFrame containing the original Index data.
1671+
1672+
See Also
1673+
--------
1674+
Index.to_series : Convert an Index to a Series.
1675+
Series.to_frame : Convert Series to DataFrame.
1676+
1677+
Examples
1678+
--------
1679+
>>> import cudf
1680+
>>> idx = cudf.Index(['Ant', 'Bear', 'Cow'], name='animal')
1681+
>>> idx.to_frame()
1682+
animal
1683+
animal
1684+
Ant Ant
1685+
Bear Bear
1686+
Cow Cow
1687+
1688+
By default, the original Index is reused. To enforce a new Index:
1689+
1690+
>>> idx.to_frame(index=False)
1691+
animal
1692+
0 Ant
1693+
1 Bear
1694+
2 Cow
1695+
1696+
To override the name of the resulting column, specify `name`:
1697+
1698+
>>> idx.to_frame(index=False, name='zoo')
1699+
zoo
1700+
0 Ant
1701+
1 Bear
1702+
2 Cow
1703+
"""
1704+
return self._to_frame(name=name, index=self if index else None)
1705+
16491706
def append(self, other):
16501707
if is_list_like(other):
16511708
to_concat = [self]

python/cudf/cudf/core/series.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1160,7 +1160,7 @@ def reset_index(
11601160
)
11611161

11621162
@_performance_tracking
1163-
def to_frame(self, name=None):
1163+
def to_frame(self, name: abc.Hashable = no_default) -> cudf.DataFrame:
11641164
"""Convert Series into a DataFrame
11651165
11661166
Parameters
@@ -1192,15 +1192,7 @@ def to_frame(self, name=None):
11921192
13 <NA>
11931193
15 d
11941194
""" # noqa: E501
1195-
1196-
if name is not None:
1197-
col = name
1198-
elif self.name is None:
1199-
col = 0
1200-
else:
1201-
col = self.name
1202-
1203-
return cudf.DataFrame({col: self._column}, index=self.index)
1195+
return self._to_frame(name=name, index=self.index)
12041196

12051197
@_performance_tracking
12061198
def memory_usage(self, index=True, deep=False):

python/cudf/cudf/core/single_column_frame.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,17 @@ def to_arrow(self) -> pa.Array:
158158
"""
159159
return self._column.to_arrow()
160160

161+
def _to_frame(
162+
self, name: Hashable, index: cudf.Index | None
163+
) -> cudf.DataFrame:
164+
"""Helper function for Series.to_frame, Index.to_frame"""
165+
if name is no_default:
166+
col_name = 0 if self.name is None else self.name
167+
else:
168+
col_name = name
169+
ca = ColumnAccessor({col_name: self._column}, verify=False)
170+
return cudf.DataFrame._from_data(ca, index=index)
171+
161172
@property # type: ignore
162173
@_performance_tracking
163174
def is_unique(self) -> bool:

python/cudf/cudf/tests/test_series.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2557,6 +2557,13 @@ def test_series_arrow_list_types_roundtrip():
25572557
cudf.from_pandas(pdf)
25582558

25592559

2560+
@pytest.mark.parametrize("base_name", [None, "a"])
2561+
def test_series_to_frame_none_name(base_name):
2562+
result = cudf.Series(range(1), name=base_name).to_frame(name=None)
2563+
expected = pd.Series(range(1), name=base_name).to_frame(name=None)
2564+
assert_eq(result, expected)
2565+
2566+
25602567
@pytest.mark.parametrize("klass", [cudf.Index, cudf.Series])
25612568
@pytest.mark.parametrize(
25622569
"data", [pa.array([float("nan")]), pa.chunked_array([[float("nan")]])]

0 commit comments

Comments
 (0)