Skip to content

Commit 7f7b1ae

Browse files
committed
BUG: Fix copy s.t. it always copies index/columns.
Only copies index/columns with `deep=True` on `BlockManager`. Plus some tests...yay! Change copy to make views of indices. Requires changing groupby to check whether indices are identical, rather than comparing with `is`. Plus add tests for name checks through everything. Also, fixes tests to use `is_()` rather than `is` CLN: Change groupby to use 'is_' instead
1 parent 54349d1 commit 7f7b1ae

File tree

10 files changed

+47
-35
lines changed

10 files changed

+47
-35
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,8 @@ Bug Fixes
455455
- Tests for fillna on empty Series (:issue:`4346`), thanks @immerrr
456456
- Fixed a bug where ``ValueError`` wasn't correctly raised when column names
457457
weren't strings (:issue:`4956`)
458+
- Fixed ``copy()`` to shallow copy axes/indices as well and thereby keep
459+
separate metadata. (:issue:`4202`, :issue:`4830`)
458460

459461
pandas 0.12.0
460462
-------------

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1988,7 +1988,7 @@ def transform(self, func, *args, **kwargs):
19881988

19891989
# broadcasting
19901990
if isinstance(res, Series):
1991-
if res.index is obj.index:
1991+
if res.index.is_(obj.index):
19921992
group.T.values[:] = res
19931993
else:
19941994
group.values[:] = res

pandas/core/internals.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2334,8 +2334,12 @@ def copy(self, deep=True):
23342334
-------
23352335
copy : BlockManager
23362336
"""
2337-
new_axes = list(self.axes)
2338-
return self.apply('copy', axes=new_axes, deep=deep, do_integrity_check=False)
2337+
if deep:
2338+
new_axes = [ax.view() for ax in self.axes]
2339+
else:
2340+
new_axes = list(self.axes)
2341+
return self.apply('copy', axes=new_axes, deep=deep,
2342+
ref_items=new_axes[0], do_integrity_check=False)
23392343

23402344
def as_matrix(self, items=None):
23412345
if len(self.blocks) == 0:

pandas/sparse/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def __init__(self, data, index=None, sparse_index=None, kind='block',
116116

117117
if is_sparse_array:
118118
if isinstance(data, SparseSeries) and index is None:
119-
index = data.index
119+
index = data.index.view()
120120
elif index is not None:
121121
assert(len(index) == len(data))
122122

@@ -125,14 +125,14 @@ def __init__(self, data, index=None, sparse_index=None, kind='block',
125125

126126
elif isinstance(data, SparseSeries):
127127
if index is None:
128-
index = data.index
128+
index = data.index.view()
129129

130130
# extract the SingleBlockManager
131131
data = data._data
132132

133133
elif isinstance(data, (Series, dict)):
134134
if index is None:
135-
index = data.index
135+
index = data.index.view()
136136

137137
data = Series(data)
138138
data, sparse_index = make_sparse(data, kind=kind,
@@ -150,7 +150,7 @@ def __init__(self, data, index=None, sparse_index=None, kind='block',
150150
if dtype is not None:
151151
data = data.astype(dtype)
152152
if index is None:
153-
index = data.index
153+
index = data.index.view()
154154
else:
155155
data = data.reindex(index, copy=False)
156156

pandas/sparse/tests/test_sparse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -787,7 +787,7 @@ def test_copy(self):
787787
cp = self.frame.copy()
788788
tm.assert_isinstance(cp, SparseDataFrame)
789789
assert_sp_frame_equal(cp, self.frame)
790-
self.assert_(cp.index is self.frame.index)
790+
self.assert_(cp.index.is_(self.frame.index))
791791

792792
def test_constructor(self):
793793
for col, series in compat.iteritems(self.frame):

pandas/tests/test_frame.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -1736,6 +1736,16 @@ class SafeForSparse(object):
17361736

17371737
_multiprocess_can_split_ = True
17381738

1739+
def test_copy_index_name_checking(self):
1740+
# don't want to be able to modify the index stored elsewhere after
1741+
# making a copy
1742+
for attr in ('index', 'columns'):
1743+
ind = getattr(self.frame, attr)
1744+
ind.name = None
1745+
cp = self.frame.copy()
1746+
getattr(cp, attr).name = 'foo'
1747+
self.assert_(getattr(self.frame, attr).name is None)
1748+
17391749
def test_getitem_pop_assign_name(self):
17401750
s = self.frame['A']
17411751
self.assertEqual(s.name, 'A')
@@ -6040,16 +6050,6 @@ def test_copy(self):
60406050
copy = self.mixed_frame.copy()
60416051
self.assert_(copy._data is not self.mixed_frame._data)
60426052

6043-
# def test_copy_index_name_checking(self):
6044-
# # don't want to be able to modify the index stored elsewhere after
6045-
# # making a copy
6046-
6047-
# self.frame.columns.name = None
6048-
# cp = self.frame.copy()
6049-
# cp.columns.name = 'foo'
6050-
6051-
# self.assert_(self.frame.columns.name is None)
6052-
60536053
def _check_method(self, method='pearson', check_minp=False):
60546054
if not check_minp:
60556055
correls = self.frame.corr(method=method)
@@ -7630,8 +7630,8 @@ def test_reindex(self):
76307630

76317631
# corner cases
76327632

7633-
# Same index, copies values
7634-
newFrame = self.frame.reindex(self.frame.index)
7633+
# Same index, copies values but not index if copy=False
7634+
newFrame = self.frame.reindex(self.frame.index, copy=False)
76357635
self.assert_(newFrame.index is self.frame.index)
76367636

76377637
# length zero

pandas/tests/test_multilevel.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1195,11 +1195,11 @@ def test_count(self):
11951195

11961196
result = frame.count(level='b')
11971197
expect = self.frame.count(level=1)
1198-
assert_frame_equal(result, expect)
1198+
assert_frame_equal(result, expect, check_names=False)
11991199

12001200
result = frame.count(level='a')
12011201
expect = self.frame.count(level=0)
1202-
assert_frame_equal(result, expect)
1202+
assert_frame_equal(result, expect, check_names=False)
12031203

12041204
series = self.series.copy()
12051205
series.index.names = ['a', 'b']

pandas/tests/test_panel.py

+7
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ class SafeForLongAndSparse(object):
6161
def test_repr(self):
6262
foo = repr(self.panel)
6363

64+
def test_copy_names(self):
65+
for attr in ('major_axis', 'minor_axis'):
66+
getattr(self.panel, attr).name = None
67+
cp = self.panel.copy()
68+
getattr(cp, attr).name = 'foo'
69+
self.assert_(getattr(self.panel, attr).name is None)
70+
6471
def test_iter(self):
6572
tm.equalContents(list(self.panel), self.panel.items)
6673

pandas/tests/test_panel4d.py

-5
Original file line numberDiff line numberDiff line change
@@ -762,11 +762,6 @@ def test_reindex(self):
762762
major=self.panel4d.major_axis,
763763
minor=self.panel4d.minor_axis)
764764

765-
assert(result.labels is self.panel4d.labels)
766-
assert(result.items is self.panel4d.items)
767-
assert(result.major_axis is self.panel4d.major_axis)
768-
assert(result.minor_axis is self.panel4d.minor_axis)
769-
770765
# don't necessarily copy
771766
result = self.panel4d.reindex()
772767
assert_panel4d_equal(result,self.panel4d)

pandas/tests/test_series.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -64,14 +64,17 @@ def test_copy_name(self):
6464
result = self.ts.copy()
6565
self.assertEquals(result.name, self.ts.name)
6666

67-
# def test_copy_index_name_checking(self):
68-
# don't want to be able to modify the index stored elsewhere after
69-
# making a copy
67+
def test_copy_index_name_checking(self):
68+
# don't want to be able to modify the index stored elsewhere after
69+
# making a copy
7070

71-
# self.ts.index.name = None
72-
# cp = self.ts.copy()
73-
# cp.index.name = 'foo'
74-
# self.assert_(self.ts.index.name is None)
71+
self.ts.index.name = None
72+
self.assert_(self.ts.index.name is None)
73+
self.assert_(self.ts is self.ts)
74+
cp = self.ts.copy()
75+
cp.index.name = 'foo'
76+
print(self.ts.index.name)
77+
self.assert_(self.ts.index.name is None)
7578

7679
def test_append_preserve_name(self):
7780
result = self.ts[:5].append(self.ts[5:])
@@ -4270,7 +4273,8 @@ def test_align_sameindex(self):
42704273

42714274
def test_reindex(self):
42724275
identity = self.series.reindex(self.series.index)
4273-
self.assertEqual(id(self.series.index), id(identity.index))
4276+
self.assert_(np.may_share_memory(self.series.index, identity.index))
4277+
self.assert_(identity.index.is_(self.series.index))
42744278

42754279
subIndex = self.series.index[10:20]
42764280
subSeries = self.series.reindex(subIndex)

0 commit comments

Comments
 (0)