Skip to content

Commit d7d9a6c

Browse files
committed
Merge pull request #4830 from jtratner/copy-index-and-columns
BUG: Fix copy s.t. it always copies index/columns.
2 parents 54349d1 + 42d1d74 commit d7d9a6c

13 files changed

+116
-89
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ Bug Fixes
455455
- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
456456
- Fixed a bug where ``ValueError`` wasn't correctly raised when column names
457457
weren't strings (:issue:`4956`)
458+
- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
459+
separate metadata. (:issue:`4202`, :issue:`4830`)
458460

459461
pandas 0.12.0
460462
-------------

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1988,7 +1988,7 @@ def transform(self, func, *args, **kwargs):
19881988

19891989
# broadcasting
19901990
if isinstance(res, Series):
1991-
if res.index is obj.index:
1991+
if res.index.is_(obj.index):
19921992
group.T.values[:] = res
19931993
else:
19941994
group.values[:] = res

pandas/core/index.py

+42-34
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import pandas.core.common as com
1717
from pandas.core.common import _values_from_object
1818
from pandas.core.config import get_option
19-
import warnings
2019

2120

2221
__all__ = ['Index']
@@ -27,6 +26,7 @@ def _indexOp(opname):
2726
Wrapper function for index comparison operations, to avoid
2827
code duplication.
2928
"""
29+
3030
def wrapper(self, other):
3131
func = getattr(self.view(np.ndarray), opname)
3232
result = func(other)
@@ -54,6 +54,7 @@ def _shouldbe_timestamp(obj):
5454

5555

5656
class Index(FrozenNDArray):
57+
5758
"""
5859
Immutable ndarray implementing an ordered, sliceable set. The basic object
5960
storing axis labels for all pandas objects
@@ -160,7 +161,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
160161

161162
elif np.isscalar(data):
162163
raise TypeError('Index(...) must be called with a collection '
163-
'of some kind, %s was passed' % repr(data))
164+
'of some kind, %s was passed' % repr(data))
164165
else:
165166
# other iterable of some kind
166167
subarr = com._asarray_tuplesafe(data, dtype=object)
@@ -171,7 +172,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None, fastpath=False,
171172
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
172173
elif inferred != 'string':
173174
if (inferred.startswith('datetime') or
174-
tslib.is_timestamp_array(subarr)):
175+
tslib.is_timestamp_array(subarr)):
175176
from pandas.tseries.index import DatetimeIndex
176177
return DatetimeIndex(data, copy=copy, name=name, **kwargs)
177178
elif inferred == 'period':
@@ -234,7 +235,7 @@ def to_series(self):
234235
useful with map for returning an indexer based on an index
235236
"""
236237
import pandas as pd
237-
return pd.Series(self.values,index=self,name=self.name)
238+
return pd.Series(self.values, index=self, name=self.name)
238239

239240
def astype(self, dtype):
240241
return Index(self.values.astype(dtype), name=self.name,
@@ -279,7 +280,7 @@ def _get_names(self):
279280
def _set_names(self, values):
280281
if len(values) != 1:
281282
raise ValueError('Length of new names must be 1, got %d'
282-
% len(values))
283+
% len(values))
283284
self.name = values[0]
284285

285286
names = property(fset=_set_names, fget=_get_names)
@@ -335,11 +336,11 @@ def _has_complex_internals(self):
335336
def summary(self, name=None):
336337
if len(self) > 0:
337338
head = self[0]
338-
if hasattr(head,'format') and\
339+
if hasattr(head, 'format') and\
339340
not isinstance(head, compat.string_types):
340341
head = head.format()
341342
tail = self[-1]
342-
if hasattr(tail,'format') and\
343+
if hasattr(tail, 'format') and\
343344
not isinstance(tail, compat.string_types):
344345
tail = tail.format()
345346
index_summary = ', %s to %s' % (com.pprint_thing(head),
@@ -571,7 +572,7 @@ def to_native_types(self, slicer=None, **kwargs):
571572
def _format_native_types(self, na_rep='', **kwargs):
572573
""" actually format my specific types """
573574
mask = isnull(self)
574-
values = np.array(self,dtype=object,copy=True)
575+
values = np.array(self, dtype=object, copy=True)
575576
values[mask] = na_rep
576577
return values.tolist()
577578

@@ -595,7 +596,7 @@ def identical(self, other):
595596
Similar to equals, but check that other comparable attributes are also equal
596597
"""
597598
return self.equals(other) and all(
598-
( getattr(self,c,None) == getattr(other,c,None) for c in self._comparables ))
599+
(getattr(self, c, None) == getattr(other, c, None) for c in self._comparables))
599600

600601
def asof(self, label):
601602
"""
@@ -886,7 +887,8 @@ def set_value(self, arr, key, value):
886887
Fast lookup of value from 1-dimensional ndarray. Only use this if you
887888
know what you're doing
888889
"""
889-
self._engine.set_value(_values_from_object(arr), _values_from_object(key), value)
890+
self._engine.set_value(
891+
_values_from_object(arr), _values_from_object(key), value)
890892

891893
def get_level_values(self, level):
892894
"""
@@ -1357,7 +1359,7 @@ def slice_locs(self, start=None, end=None):
13571359

13581360
# get_loc will return a boolean array for non_uniques
13591361
# if we are not monotonic
1360-
if isinstance(start_slice,np.ndarray):
1362+
if isinstance(start_slice, np.ndarray):
13611363
raise KeyError("cannot peform a slice operation "
13621364
"on a non-unique non-monotonic index")
13631365

@@ -1379,7 +1381,7 @@ def slice_locs(self, start=None, end=None):
13791381
if not is_unique:
13801382

13811383
# get_loc will return a boolean array for non_uniques
1382-
if isinstance(end_slice,np.ndarray):
1384+
if isinstance(end_slice, np.ndarray):
13831385
raise KeyError("cannot perform a slice operation "
13841386
"on a non-unique non-monotonic index")
13851387

@@ -1447,6 +1449,7 @@ def drop(self, labels):
14471449

14481450

14491451
class Int64Index(Index):
1452+
14501453
"""
14511454
Immutable ndarray implementing an ordered, sliceable set. The basic object
14521455
storing axis labels for all pandas objects. Int64Index is a special case of `Index`
@@ -1579,6 +1582,7 @@ def _wrap_joined_index(self, joined, other):
15791582

15801583

15811584
class MultiIndex(Index):
1585+
15821586
"""
15831587
Implements multi-level, a.k.a. hierarchical, index object for pandas
15841588
objects
@@ -1625,7 +1629,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
16251629
if names is not None:
16261630
subarr._set_names(names)
16271631

1628-
16291632
if sortorder is not None:
16301633
subarr.sortorder = int(sortorder)
16311634
else:
@@ -1636,7 +1639,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
16361639
def _get_levels(self):
16371640
return self._levels
16381641

1639-
16401642
def _set_levels(self, levels, copy=False):
16411643
# This is NOT part of the levels property because it should be
16421644
# externally not allowed to set levels. User beware if you change
@@ -1686,7 +1688,7 @@ def _get_labels(self):
16861688
def _set_labels(self, labels, copy=False):
16871689
if len(labels) != self.nlevels:
16881690
raise ValueError("Length of levels and labels must be the same.")
1689-
self._labels = FrozenList(_ensure_frozen(labs,copy=copy)._shallow_copy()
1691+
self._labels = FrozenList(_ensure_frozen(labs, copy=copy)._shallow_copy()
16901692
for labs in labels)
16911693

16921694
def set_labels(self, labels, inplace=False):
@@ -1811,13 +1813,13 @@ def _set_names(self, values):
18111813
values = list(values)
18121814
if len(values) != self.nlevels:
18131815
raise ValueError('Length of names (%d) must be same as level '
1814-
'(%d)' % (len(values),self.nlevels))
1816+
'(%d)' % (len(values), self.nlevels))
18151817
# set the name
18161818
for name, level in zip(values, self.levels):
18171819
level.rename(name, inplace=True)
18181820

1819-
1820-
names = property(fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
1821+
names = property(
1822+
fset=_set_names, fget=_get_names, doc="Names of levels in MultiIndex")
18211823

18221824
def _format_native_types(self, **kwargs):
18231825
return self.tolist()
@@ -1845,7 +1847,7 @@ def _get_level_number(self, level):
18451847
count = self.names.count(level)
18461848
if count > 1:
18471849
raise ValueError('The name %s occurs multiple times, use a '
1848-
'level number' % level)
1850+
'level number' % level)
18491851
level = self.names.index(level)
18501852
except ValueError:
18511853
if not isinstance(level, int):
@@ -1980,9 +1982,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
19801982
formatted = lev.take(lab).format(formatter=formatter)
19811983

19821984
# we have some NA
1983-
mask = lab==-1
1985+
mask = lab == -1
19841986
if mask.any():
1985-
formatted = np.array(formatted,dtype=object)
1987+
formatted = np.array(formatted, dtype=object)
19861988
formatted[mask] = na_rep
19871989
formatted = formatted.tolist()
19881990

@@ -2000,7 +2002,6 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
20002002
level.append(com.pprint_thing(name, escape_chars=('\t', '\r', '\n'))
20012003
if name is not None else '')
20022004

2003-
20042005
level.extend(np.array(lev, dtype=object))
20052006
result_levels.append(level)
20062007

@@ -2010,8 +2011,9 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
20102011
if sparsify:
20112012
sentinal = ''
20122013
# GH3547
2013-
# use value of sparsify as sentinal, unless it's an obvious "Truthey" value
2014-
if sparsify not in [True,1]:
2014+
# use value of sparsify as sentinal, unless it's an obvious
2015+
# "Truthey" value
2016+
if sparsify not in [True, 1]:
20152017
sentinal = sparsify
20162018
# little bit of a kludge job for #1217
20172019
result_levels = _sparsify(result_levels,
@@ -2138,7 +2140,8 @@ def __contains__(self, key):
21382140
def __reduce__(self):
21392141
"""Necessary for making this object picklable"""
21402142
object_state = list(np.ndarray.__reduce__(self))
2141-
subclass_state = (list(self.levels), list(self.labels), self.sortorder, list(self.names))
2143+
subclass_state = (list(self.levels), list(
2144+
self.labels), self.sortorder, list(self.names))
21422145
object_state[2] = (object_state[2], subclass_state)
21432146
return tuple(object_state)
21442147

@@ -2490,7 +2493,8 @@ def reindex(self, target, method=None, level=None, limit=None,
24902493
"with a method or limit")
24912494
return self[target], target
24922495

2493-
raise Exception("cannot handle a non-takeable non-unique multi-index!")
2496+
raise Exception(
2497+
"cannot handle a non-takeable non-unique multi-index!")
24942498

24952499
if not isinstance(target, MultiIndex):
24962500
if indexer is None:
@@ -2685,12 +2689,13 @@ def partial_selection(key):
26852689

26862690
# here we have a completely specified key, but are using some partial string matching here
26872691
# GH4758
2688-
can_index_exactly = any([ l.is_all_dates and not isinstance(k,compat.string_types) for k, l in zip(key, self.levels) ])
2689-
if any([ l.is_all_dates for k, l in zip(key, self.levels) ]) and not can_index_exactly:
2692+
can_index_exactly = any(
2693+
[l.is_all_dates and not isinstance(k, compat.string_types) for k, l in zip(key, self.levels)])
2694+
if any([l.is_all_dates for k, l in zip(key, self.levels)]) and not can_index_exactly:
26902695
indexer = slice(*self.slice_locs(key, key))
26912696

26922697
# we have a multiple selection here
2693-
if not indexer.stop-indexer.start == 1:
2698+
if not indexer.stop - indexer.start == 1:
26942699
return partial_selection(key)
26952700

26962701
key = tuple(self[indexer].tolist()[0])
@@ -2913,7 +2918,8 @@ def _assert_can_do_setop(self, other):
29132918

29142919
def astype(self, dtype):
29152920
if np.dtype(dtype) != np.object_:
2916-
raise TypeError("Setting %s dtype to anything other than object is not supported" % self.__class__)
2921+
raise TypeError(
2922+
"Setting %s dtype to anything other than object is not supported" % self.__class__)
29172923
return self._shallow_copy()
29182924

29192925
def insert(self, loc, item):
@@ -2935,7 +2941,8 @@ def insert(self, loc, item):
29352941
if not isinstance(item, tuple):
29362942
item = (item,) + ('',) * (self.nlevels - 1)
29372943
elif len(item) != self.nlevels:
2938-
raise ValueError('Item must have length equal to number of levels.')
2944+
raise ValueError(
2945+
'Item must have length equal to number of levels.')
29392946

29402947
new_levels = []
29412948
new_labels = []
@@ -2990,7 +2997,7 @@ def _wrap_joined_index(self, joined, other):
29902997

29912998
# For utility purposes
29922999

2993-
def _sparsify(label_list, start=0,sentinal=''):
3000+
def _sparsify(label_list, start=0, sentinal=''):
29943001
pivoted = lzip(*label_list)
29953002
k = len(label_list)
29963003

@@ -3031,7 +3038,7 @@ def _ensure_index(index_like, copy=False):
30313038
if isinstance(index_like, list):
30323039
if type(index_like) != list:
30333040
index_like = list(index_like)
3034-
# #2200 ?
3041+
# 2200 ?
30353042
converted, all_arrays = lib.clean_index_list(index_like)
30363043

30373044
if len(converted) > 0 and all_arrays:
@@ -3169,7 +3176,8 @@ def _get_consensus_names(indexes):
31693176

31703177
# find the non-none names, need to tupleify to make
31713178
# the set hashable, then reverse on return
3172-
consensus_names = set([ tuple(i.names) for i in indexes if all(n is not None for n in i.names) ])
3179+
consensus_names = set([tuple(i.names)
3180+
for i in indexes if all(n is not None for n in i.names)])
31733181
if len(consensus_names) == 1:
31743182
return list(list(consensus_names)[0])
31753183
return [None] * indexes[0].nlevels

pandas/core/internals.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2334,8 +2334,12 @@ def copy(self, deep=True):
23342334
-------
23352335
copy : BlockManager
23362336
"""
2337-
new_axes = list(self.axes)
2338-
return self.apply('copy', axes=new_axes, deep=deep, do_integrity_check=False)
2337+
if deep:
2338+
new_axes = [ax.view() for ax in self.axes]
2339+
else:
2340+
new_axes = list(self.axes)
2341+
return self.apply('copy', axes=new_axes, deep=deep,
2342+
ref_items=new_axes[0], do_integrity_check=False)
23392343

23402344
def as_matrix(self, items=None):
23412345
if len(self.blocks) == 0:

pandas/sparse/panel.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -235,19 +235,25 @@ def __setstate__(self, state):
235235
self._minor_axis = _ensure_index(com._unpickle_array(minor))
236236
self._frames = frames
237237

238-
def copy(self):
238+
def copy(self, deep=True):
239239
"""
240-
Make a (shallow) copy of the sparse panel
240+
Make a copy of the sparse panel
241241
242242
Returns
243243
-------
244244
copy : SparsePanel
245245
"""
246-
return SparsePanel(self._frames.copy(), items=self.items,
247-
major_axis=self.major_axis,
248-
minor_axis=self.minor_axis,
249-
default_fill_value=self.default_fill_value,
250-
default_kind=self.default_kind)
246+
247+
d = self._construct_axes_dict()
248+
if deep:
249+
new_data = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(self._frames))
250+
d = dict((k, v.copy(deep=True)) for k, v in compat.iteritems(d))
251+
else:
252+
new_data = self._frames.copy()
253+
d['default_fill_value']=self.default_fill_value
254+
d['default_kind']=self.default_kind
255+
256+
return SparsePanel(new_data, **d)
251257

252258
def to_frame(self, filter_observations=True):
253259
"""

0 commit comments

Comments
 (0)