Skip to content

Commit 5cad4d2

Browse files
committed
ENH: Additional keyword arguments for Index.copy()
* Index derivatives can set `name` or `names` as well as `dtype` on copy. MultiIndex can set `levels`, `labels`, and `names`. * Also, `__deepcopy__` just calls `copy(deep=True)` * Now, BlockManager.copy() takes an additional argument `copy_axes` which copies axes as well. Defaults to False. * `Series.copy()` takes an optional deep argument, which causes it to copy its index. * `DataFrame.copy()` passes `copy_axes=True` when deepcopying. * Add copy kwarg to MultiIndex `__new__`
1 parent 033a932 commit 5cad4d2

File tree

5 files changed

+163
-54
lines changed

5 files changed

+163
-54
lines changed

doc/source/release.rst

+3
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ pandas 0.13
4747
- Added a more informative error message when plot arguments contain
4848
overlapping color and style arguments (:issue:`4402`)
4949
- Significant table writing performance improvements in ``HDFStore``
50+
- ``Index.copy()`` and ``MultiIndex.copy()`` now accept keyword arguments to
51+
change attributes (i.e., ``names``, ``levels``, ``labels``)
52+
(:issue:`4039`)
5053
- Add ``rename`` and ``set_names`` methods to ``Index`` as well as
5154
``set_names``, ``set_levels``, ``set_labels`` to ``MultiIndex``.
5255
(:issue:`4039`)

pandas/core/index.py

+83-24
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,12 @@ def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs):
110110
return Int64Index(data, copy=copy, dtype=dtype, name=name)
111111

112112
subarr = com._asarray_tuplesafe(data, dtype=object)
113+
114+
# _asarray_tuplesafe does not always copy underlying data,
115+
# so need to make sure that this happens
116+
if copy:
117+
subarr = subarr.copy()
118+
113119
elif np.isscalar(data):
114120
raise TypeError('Index(...) must be called with a collection '
115121
'of some kind, %s was passed' % repr(data))
@@ -120,7 +126,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, **kwargs):
120126
if dtype is None:
121127
inferred = lib.infer_dtype(subarr)
122128
if inferred == 'integer':
123-
return Int64Index(subarr.astype('i8'), name=name)
129+
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
124130
elif inferred != 'string':
125131
if (inferred.startswith('datetime') or
126132
tslib.is_timestamp_array(subarr)):
@@ -145,6 +151,41 @@ def __array_finalize__(self, obj):
145151
def _shallow_copy(self):
146152
return self.view()
147153

154+
def copy(self, names=None, name=None, dtype=None, deep=False):
155+
"""
156+
Make a copy of this object. Name and dtype sets those attributes on
157+
the new object.
158+
159+
Parameters
160+
----------
161+
name : string, optional
162+
dtype : numpy dtype or pandas type
163+
164+
Returns
165+
-------
166+
copy : Index
167+
168+
Notes
169+
-----
170+
In most cases, there should be no functional difference from using
171+
``deep``, but if ``deep`` is passed it will attempt to deepcopy.
172+
"""
173+
if names is not None and name is not None:
174+
raise TypeError("Can only provide one of `names` and `name`")
175+
if deep:
176+
from copy import deepcopy
177+
new_index = np.ndarray.__deepcopy__(self, {}).view(self.__class__)
178+
name = name or deepcopy(self.name)
179+
else:
180+
new_index = super(Index, self).copy()
181+
if name is not None:
182+
names = [name]
183+
if names:
184+
new_index = new_index.set_names(names)
185+
if dtype:
186+
new_index = new_index.astype(dtype)
187+
return new_index
188+
148189
def __unicode__(self):
149190
"""
150191
Return a string representation for a particular Index
@@ -338,10 +379,7 @@ def __setstate__(self, state):
338379
np.ndarray.__setstate__(self, state)
339380

340381
def __deepcopy__(self, memo={}):
341-
"""
342-
Index is not mutable, so disabling deepcopy
343-
"""
344-
return self._shallow_copy()
382+
return self.copy(deep=True)
345383

346384
def __contains__(self, key):
347385
hash(key)
@@ -1440,9 +1478,9 @@ class MultiIndex(Index):
14401478
14411479
Parameters
14421480
----------
1443-
levels : list or tuple of arrays
1481+
levels : sequence of arrays
14441482
The unique labels for each level
1445-
labels : list or tuple of arrays
1483+
labels : sequence of arrays
14461484
Integers for each level designating which label at each location
14471485
sortorder : optional int
14481486
Level of sortedness (must be lexicographically sorted by that
@@ -1455,7 +1493,8 @@ class MultiIndex(Index):
14551493
_levels = FrozenList()
14561494
_labels = FrozenList()
14571495

1458-
def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
1496+
def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
1497+
copy=False):
14591498
if len(levels) != len(labels):
14601499
raise ValueError(
14611500
'Length of levels and labels must be the same')
@@ -1467,12 +1506,12 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None):
14671506
else:
14681507
name = None
14691508

1470-
return Index(levels[0], name=name).take(labels[0])
1509+
return Index(levels[0], name=name, copy=True).take(labels[0])
14711510

14721511
# v3, 0.8.0
14731512
subarr = np.empty(0, dtype=object).view(cls)
1474-
subarr._set_levels(levels)
1475-
subarr._set_labels(labels)
1513+
subarr._set_levels(levels, copy=copy)
1514+
subarr._set_labels(labels, copy=copy)
14761515

14771516
if names is not None:
14781517
subarr._set_names(names)
@@ -1489,13 +1528,13 @@ def _get_levels(self):
14891528
return self._levels
14901529

14911530

1492-
def _set_levels(self, levels):
1531+
def _set_levels(self, levels, copy=False):
14931532
# This is NOT part of the levels property because it should be
14941533
# externally not allowed to set levels. User beware if you change
14951534
# _levels directly
14961535
if len(levels) == 0:
14971536
raise ValueError("Must set non-zero number of levels.")
1498-
levels = FrozenList(_ensure_index(lev)._shallow_copy()
1537+
levels = FrozenList(_ensure_index(lev, copy=copy)._shallow_copy()
14991538
for lev in levels)
15001539
names = self.names
15011540
self._levels = levels
@@ -1534,10 +1573,11 @@ def set_levels(self, levels, inplace=False):
15341573
def _get_labels(self):
15351574
return self._labels
15361575

1537-
def _set_labels(self, labels):
1576+
def _set_labels(self, labels, copy=False):
15381577
if len(labels) != self.nlevels:
15391578
raise ValueError("Length of levels and labels must be the same.")
1540-
self._labels = FrozenList(_ensure_frozen(labs)._shallow_copy() for labs in labels)
1579+
self._labels = FrozenList(_ensure_frozen(labs,copy=copy)._shallow_copy()
1580+
for labs in labels)
15411581

15421582
def set_labels(self, labels, inplace=False):
15431583
"""
@@ -1546,8 +1586,8 @@ def set_labels(self, labels, inplace=False):
15461586
15471587
Parameters
15481588
----------
1549-
labels : sequence
1550-
new levels to apply
1589+
labels : sequence of arrays
1590+
new labels to apply
15511591
inplace : bool
15521592
if True, mutates in place
15531593
@@ -1592,6 +1632,11 @@ def copy(self, names=None, dtype=None, levels=None, labels=None,
15921632
This could be potentially expensive on large MultiIndex objects.
15931633
"""
15941634
new_index = np.ndarray.copy(self)
1635+
if deep:
1636+
from copy import deepcopy
1637+
levels = levels if levels is not None else deepcopy(self.levels)
1638+
labels = labels if labels is not None else deepcopy(self.labels)
1639+
names = names if names is not None else deepcopy(self.names)
15951640
if levels is not None:
15961641
new_index = new_index.set_levels(levels)
15971642
if labels is not None:
@@ -2831,11 +2876,13 @@ def _sparsify(label_list, start=0,sentinal=''):
28312876
return lzip(*result)
28322877

28332878

2834-
def _ensure_index(index_like):
2879+
def _ensure_index(index_like, copy=False):
28352880
if isinstance(index_like, Index):
2881+
if copy:
2882+
index_like = index_like.copy()
28362883
return index_like
28372884
if hasattr(index_like, 'name'):
2838-
return Index(index_like, name=index_like.name)
2885+
return Index(index_like, name=index_like.name, copy=copy)
28392886

28402887
# must check for exactly list here because of strict type
28412888
# check in clean_index_list
@@ -2849,15 +2896,27 @@ def _ensure_index(index_like):
28492896
return MultiIndex.from_arrays(converted)
28502897
else:
28512898
index_like = converted
2899+
else:
2900+
# clean_index_list does the equivalent of copying
2901+
# so only need to do this if not list instance
2902+
if copy:
2903+
from copy import copy
2904+
index_like = copy(index_like)
28522905

28532906
return Index(index_like)
28542907

2855-
def _ensure_frozen(nd_array_like):
2856-
if isinstance(nd_array_like, FrozenNDArray):
2857-
return nd_array_like
2858-
else:
2908+
2909+
def _ensure_frozen(nd_array_like, copy=False):
2910+
if not isinstance(nd_array_like, FrozenNDArray):
28592911
arr = np.asarray(nd_array_like, dtype=np.int_)
2860-
return arr.view(FrozenNDArray)
2912+
# have to do this separately so that non-index input gets copied
2913+
if copy:
2914+
arr = arr.copy()
2915+
nd_array_like = arr.view(FrozenNDArray)
2916+
else:
2917+
if copy:
2918+
nd_array_like = nd_array_like.copy()
2919+
return nd_array_like
28612920

28622921

28632922
def _validate_join_method(method):

pandas/core/series.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1309,16 +1309,31 @@ def values(self):
13091309
"""
13101310
return self.view(ndarray)
13111311

1312-
def copy(self, order='C'):
1312+
def copy(self, order='C', deep=False):
13131313
"""
13141314
Return new Series with copy of underlying values
13151315
1316+
Parameters
1317+
----------
1318+
deep : boolean, default False
1319+
deep copy index along with data
1320+
order : boolean, default 'C'
1321+
order for underlying numpy array
1322+
13161323
Returns
13171324
-------
13181325
cp : Series
13191326
"""
1320-
return Series(self.values.copy(order), index=self.index,
1321-
name=self.name)
1327+
if deep:
1328+
from copy import deepcopy
1329+
index = self.index.copy(deep=deep)
1330+
name = deepcopy(self.name)
1331+
else:
1332+
index = self.index
1333+
name = self.name
1334+
1335+
return Series(self.values.copy(order), index=index,
1336+
name=name)
13221337

13231338
def tolist(self):
13241339
"""

pandas/tests/test_common.py

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from pandas.tslib import iNaT
1010

1111
from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp
12+
import pandas.compat as compat
1213
from pandas.compat import range, long, lrange, lmap, u
1314
from pandas.core.common import notnull, isnull
1415
import pandas.compat as compat

0 commit comments

Comments
 (0)