Skip to content

Commit 0b4fdf9

Browse files
bmcfeejreback
authored andcommitted
ENH: Add __copy__ and __deepcopy__ to NDFrame
closes #15370 Author: Brian McFee <[email protected]> Author: Jeff Reback <[email protected]> Closes #15444 from bmcfee/deepcopy-ndframe and squashes the following commits: bf36f35 [Jeff Reback] TST: skip the panel4d deepcopy tests d58b1f6 [Brian McFee] added tests for copy and deepcopy 35f3e0f [Brian McFee] relocated Index.__deepcopy__ to live near __copy__ 1aea940 [Brian McFee] switched deepcopy test to using generic comparator 7e67e7d [Brian McFee] ndframe and index __copy__ are now proper methods 820664c [Brian McFee] moved deepcopy test to generic.py 9721041 [Brian McFee] added copy/deepcopy to ndframe, fixes #15370
1 parent 12f2c6a commit 0b4fdf9

File tree

4 files changed

+41
-7
lines changed

4 files changed

+41
-7
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Other API Changes
433433
- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`)
434434
- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no longer casted to ``int64`` which also caused precision loss (:issue:`14064`, :issue:`14305`).
435435
- Reorganization of timeseries development tests (:issue:`14854`)
436+
- Specific support for ``copy.copy()`` and ``copy.deepcopy()`` functions on NDFrame objects (:issue:`15444`)
436437

437438
.. _whatsnew_0200.deprecations:
438439

@@ -500,7 +501,7 @@ Bug Fixes
500501
- Bug in ``pd.read_csv()`` in which a file containing a row with many columns followed by rows with fewer columns would cause a crash (:issue:`14125`)
501502
- Bug in ``pd.tools.hashing.hash_pandas_object()`` in which hashing of categoricals depended on the ordering of categories, instead of just their values. (:issue:`15143`)
502503
- Bug in ``.groupby(..).resample()`` when passed the ``on=`` kwarg. (:issue:`15021`)
503-
504+
- Bug in using ``__deepcopy__`` on empty NDFrame objects (:issue:`15370`)
504505
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`, :issue:`15424`)
505506
- Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a numpy array (:issue:`15434`)
506507

pandas/core/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -3161,6 +3161,14 @@ def copy(self, deep=True):
31613161
data = self._data.copy(deep=deep)
31623162
return self._constructor(data).__finalize__(self)
31633163

3164+
def __copy__(self, deep=True):
3165+
return self.copy(deep=deep)
3166+
3167+
def __deepcopy__(self, memo=None):
3168+
if memo is None:
3169+
memo = {}
3170+
return self.copy(deep=True)
3171+
31643172
def _convert(self, datetime=False, numeric=False, timedelta=False,
31653173
coerce=False, copy=True):
31663174
"""

pandas/indexes/base.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,13 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
724724
new_index = new_index.astype(dtype)
725725
return new_index
726726

727-
__copy__ = copy
727+
def __copy__(self, **kwargs):
728+
return self.copy(**kwargs)
729+
730+
def __deepcopy__(self, memo=None):
731+
if memo is None:
732+
memo = {}
733+
return self.copy(deep=True)
728734

729735
def _validate_names(self, name=None, names=None, deep=False):
730736
"""
@@ -1480,11 +1486,6 @@ def __setstate__(self, state):
14801486

14811487
_unpickle_compat = __setstate__
14821488

1483-
def __deepcopy__(self, memo=None):
1484-
if memo is None:
1485-
memo = {}
1486-
return self.copy(deep=True)
1487-
14881489
def __nonzero__(self):
14891490
raise ValueError("The truth value of a {0} is ambiguous. "
14901491
"Use a.empty, a.bool(), a.item(), a.any() or a.all()."

pandas/tests/test_generic.py

+24
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# pylint: disable-msg=E1101,W0612
33

44
from operator import methodcaller
5+
from copy import copy, deepcopy
56
import pytest
67
import numpy as np
78
from numpy import nan
@@ -675,6 +676,18 @@ def test_validate_bool_args(self):
675676
with self.assertRaises(ValueError):
676677
super(DataFrame, df).mask(cond=df.a > 2, inplace=value)
677678

679+
def test_copy_and_deepcopy(self):
680+
# GH 15444
681+
for shape in [0, 1, 2]:
682+
obj = self._construct(shape)
683+
for func in [copy,
684+
deepcopy,
685+
lambda x: x.copy(deep=False),
686+
lambda x: x.copy(deep=True)]:
687+
obj_copy = func(obj)
688+
self.assertIsNot(obj_copy, obj)
689+
self._compare(obj_copy, obj)
690+
678691

679692
class TestSeries(tm.TestCase, Generic):
680693
_typ = Series
@@ -1539,6 +1552,14 @@ def test_to_xarray(self):
15391552
expected,
15401553
check_index_type=False)
15411554

1555+
def test_deepcopy_empty(self):
1556+
# This test covers empty frame copying with non-empty column sets
1557+
# as reported in issue GH15370
1558+
empty_frame = DataFrame(data=[], index=[], columns=['A'])
1559+
empty_frame_copy = deepcopy(empty_frame)
1560+
1561+
self._compare(empty_frame_copy, empty_frame)
1562+
15421563

15431564
class TestPanel(tm.TestCase, Generic):
15441565
_typ = Panel
@@ -1569,6 +1590,9 @@ class TestPanel4D(tm.TestCase, Generic):
15691590
def test_sample(self):
15701591
pytest.skip("sample on Panel4D")
15711592

1593+
def test_copy_and_deepcopy(self):
1594+
pytest.skip("copy_and_deepcopy on Panel4D")
1595+
15721596
def test_to_xarray(self):
15731597

15741598
tm._skip_if_no_xarray()

0 commit comments

Comments
 (0)