Skip to content

Commit aa789ea

Browse files
authored
ENH: Add lazy copy to to_timestamp and to_period (#50575)
1 parent d00b945 commit aa789ea

File tree

4 files changed

+57
-22
lines changed

4 files changed

+57
-22
lines changed

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10918,7 +10918,7 @@ def to_timestamp(
1091810918
freq: Frequency | None = None,
1091910919
how: str = "start",
1092010920
axis: Axis = 0,
10921-
copy: bool = True,
10921+
copy: bool | None = None,
1092210922
) -> DataFrame:
1092310923
"""
1092410924
Cast to DatetimeIndex of timestamps, at *beginning* of period.
@@ -10953,7 +10953,7 @@ def to_timestamp(
1095310953
return new_obj
1095410954

1095510955
def to_period(
10956-
self, freq: Frequency | None = None, axis: Axis = 0, copy: bool = True
10956+
self, freq: Frequency | None = None, axis: Axis = 0, copy: bool | None = None
1095710957
) -> DataFrame:
1095810958
"""
1095910959
Convert DataFrame from DatetimeIndex to PeriodIndex.

pandas/core/series.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -5623,7 +5623,7 @@ def to_timestamp(
56235623
self,
56245624
freq=None,
56255625
how: Literal["s", "e", "start", "end"] = "start",
5626-
copy: bool = True,
5626+
copy: bool | None = None,
56275627
) -> Series:
56285628
"""
56295629
Cast to DatetimeIndex of Timestamps, at *beginning* of period.
@@ -5642,18 +5642,15 @@ def to_timestamp(
56425642
-------
56435643
Series with DatetimeIndex
56445644
"""
5645-
new_values = self._values
5646-
if copy:
5647-
new_values = new_values.copy()
5648-
56495645
if not isinstance(self.index, PeriodIndex):
56505646
raise TypeError(f"unsupported Type {type(self.index).__name__}")
5647+
5648+
new_obj = self.copy(deep=copy)
56515649
new_index = self.index.to_timestamp(freq=freq, how=how)
5652-
return self._constructor(new_values, index=new_index).__finalize__(
5653-
self, method="to_timestamp"
5654-
)
5650+
setattr(new_obj, "index", new_index)
5651+
return new_obj
56555652

5656-
def to_period(self, freq: str | None = None, copy: bool = True) -> Series:
5653+
def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series:
56575654
"""
56585655
Convert Series from DatetimeIndex to PeriodIndex.
56595656
@@ -5669,16 +5666,13 @@ def to_period(self, freq: str | None = None, copy: bool = True) -> Series:
56695666
Series
56705667
Series with index converted to PeriodIndex.
56715668
"""
5672-
new_values = self._values
5673-
if copy:
5674-
new_values = new_values.copy()
5675-
56765669
if not isinstance(self.index, DatetimeIndex):
56775670
raise TypeError(f"unsupported Type {type(self.index).__name__}")
5671+
5672+
new_obj = self.copy(deep=copy)
56785673
new_index = self.index.to_period(freq=freq)
5679-
return self._constructor(new_values, index=new_index).__finalize__(
5680-
self, method="to_period"
5681-
)
5674+
setattr(new_obj, "index", new_index)
5675+
return new_obj
56825676

56835677
@overload
56845678
def ffill(

pandas/tests/copy_view/test_methods.py

+38
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
DataFrame,
66
Index,
77
MultiIndex,
8+
Period,
89
Series,
10+
Timestamp,
911
date_range,
1012
)
1113
import pandas._testing as tm
@@ -374,6 +376,42 @@ def test_chained_methods(request, method, idx, using_copy_on_write):
374376
tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)
375377

376378

379+
@pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
380+
def test_to_timestamp(using_copy_on_write, obj):
381+
obj.index = Index([Period("2012-1-1", freq="D"), Period("2012-1-2", freq="D")])
382+
383+
obj_orig = obj.copy()
384+
obj2 = obj.to_timestamp()
385+
386+
if using_copy_on_write:
387+
assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
388+
else:
389+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
390+
391+
# mutating obj2 triggers a copy-on-write for that column / block
392+
obj2.iloc[0] = 0
393+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
394+
tm.assert_equal(obj, obj_orig)
395+
396+
397+
@pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
398+
def test_to_period(using_copy_on_write, obj):
399+
obj.index = Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")])
400+
401+
obj_orig = obj.copy()
402+
obj2 = obj.to_period(freq="Y")
403+
404+
if using_copy_on_write:
405+
assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
406+
else:
407+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
408+
409+
# mutating obj2 triggers a copy-on-write for that column / block
410+
obj2.iloc[0] = 0
411+
assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
412+
tm.assert_equal(obj, obj_orig)
413+
414+
377415
def test_set_index(using_copy_on_write):
378416
# GH 49473
379417
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})

pandas/tests/copy_view/util.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1+
from pandas import Series
12
from pandas.core.arrays import BaseMaskedArray
23

34

4-
def get_array(df, col):
5+
def get_array(obj, col):
56
"""
6-
Helper method to get array for a DataFrame column.
7+
Helper method to get array for a DataFrame column or a Series.
78
89
Equivalent of df[col].values, but without going through normal getitem,
910
which triggers tracking references / CoW (and we might be testing that
1011
this is done by some other operation).
1112
"""
12-
icol = df.columns.get_loc(col)
13+
if isinstance(obj, Series) and obj.name == col:
14+
return obj._values
15+
icol = obj.columns.get_loc(col)
1316
assert isinstance(icol, int)
14-
arr = df._get_column_array(icol)
17+
arr = obj._get_column_array(icol)
1518
if isinstance(arr, BaseMaskedArray):
1619
return arr._data
1720
return arr

0 commit comments

Comments
 (0)