Skip to content

Commit 1814085

Browse files
author
Nick Eubank
committed
make _is_view more conservative on multi-block objects
tweak so addition of new columnsdoesn't cause copying improved tests, removed copy option from merge
1 parent b41ef8f commit 1814085

13 files changed

+82
-318
lines changed

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4315,12 +4315,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix='',
43154315
@Appender(_merge_doc, indents=2)
43164316
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
43174317
left_index=False, right_index=False, sort=False,
4318-
suffixes=('_x', '_y'), copy=True, indicator=False):
4318+
suffixes=('_x', '_y'), indicator=False):
43194319
from pandas.tools.merge import merge
43204320
return merge(self, right, how=how, on=on,
43214321
left_on=left_on, right_on=right_on,
43224322
left_index=left_index, right_index=right_index, sort=sort,
4323-
suffixes=suffixes, copy=copy, indicator=indicator)
4323+
suffixes=suffixes, indicator=indicator)
43244324

43254325
def round(self, decimals=0, out=None):
43264326
"""

pandas/core/generic.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -1089,9 +1089,6 @@ def get(self, key, default=None):
10891089
def __getitem__(self, item):
10901090
result = self._get_item_cache(item)
10911091

1092-
if isinstance(item, str):
1093-
result._is_column_view = True
1094-
10951092
return result
10961093

10971094
def _get_item_cache(self, item):
@@ -1216,10 +1213,14 @@ def _slice(self, slobj, axis=0, kind=None):
12161213
return result
12171214

12181215
def _set_item(self, key, value):
1219-
1220-
# If children are views, reset to copies before setting.
1221-
self._execute_copy_on_write()
12221216

1217+
if hasattr(self, 'columns'):
1218+
if key in self.columns:
1219+
# If children are views, reset to copies before setting.
1220+
self._execute_copy_on_write()
1221+
else:
1222+
self._execute_copy_on_write()
1223+
12231224
self._data.set(key, value)
12241225
self._clear_item_cache()
12251226

@@ -2300,6 +2301,9 @@ def __setattr__(self, name, value):
23002301
# e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
23012302
# the same attribute.
23022303

2304+
if hasattr(self, 'columns'):
2305+
if name in self.columns:
2306+
self._execute_copy_on_write()
23032307

23042308
try:
23052309
object.__getattribute__(self, name)

pandas/core/internals.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2950,10 +2950,11 @@ def is_datelike_mixed_type(self):
29502950

29512951
@property
29522952
def is_view(self):
2953-
""" return a boolean if we are a single block and are a view """
2954-
if len(self.blocks) == 1:
2955-
return self.blocks[0].is_view
2956-
2953+
""" return a boolean True if any block is a view """
2954+
for b in self.blocks:
2955+
if b.is_view: return True
2956+
2957+
29572958
# It is technically possible to figure out which blocks are views
29582959
# e.g. [ b.values.base is not None for b in self.blocks ]
29592960
# but then we have the case of possibly some blocks being a view

pandas/core/panel.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1310,8 +1310,10 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
13101310
other = other.reindex(**{axis_name: axis_values})
13111311

13121312
for frame in axis_values:
1313-
self[frame].update(other[frame], join, overwrite, filter_func,
1313+
temp = self[frame]
1314+
temp.update(other[frame], join, overwrite, filter_func,
13141315
raise_conflict)
1316+
self[frame] = temp
13151317

13161318
def _get_join_index(self, other, how):
13171319
if how == 'left':

pandas/core/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -945,7 +945,7 @@ def melt_stub(df, stub, i, j):
945945

946946
for stub in stubnames[1:]:
947947
new = melt_stub(df, stub, id_vars, j)
948-
newdf = newdf.merge(new, how="outer", on=id_vars + [j], copy=False)
948+
newdf = newdf.merge(new, how="outer", on=id_vars + [j])
949949
return newdf.set_index([i, j])
950950

951951
def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,

pandas/tests/test_frame.py

+13-26
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,6 @@ def test_setitem_list(self):
193193

194194
assert_series_equal(self.frame['B'], data['A'], check_names=False)
195195
assert_series_equal(self.frame['A'], data['B'], check_names=False)
196-
197196
with assertRaisesRegexp(ValueError, 'Columns must be same length as key'):
198197
data[['A']] = self.frame[['A', 'B']]
199198
with assertRaisesRegexp(ValueError, 'Length of values does not match '
@@ -560,14 +559,14 @@ def test_setitem(self):
560559
self.frame['col8'] = 'foo'
561560
assert((self.frame['col8'] == 'foo').all())
562561

563-
# this is partially a view (e.g. some blocks are view)
564-
# so raise/warn
562+
# Changes should not propageate
565563
smaller = self.frame[:2]
566564
def f():
567-
smaller['col10'] = ['1', '2']
568-
self.assertRaises(com.SettingWithCopyError, f)
569-
self.assertEqual(smaller['col10'].dtype, np.object_)
570-
self.assertTrue((smaller['col10'] == ['1', '2']).all())
565+
smaller['col0'] = ['1', '2']
566+
f()
567+
self.assertEqual(smaller['col0'].dtype, np.object_)
568+
self.assertTrue((smaller['col0'] == ['1', '2']).all())
569+
self.assertNotEqual(self.frame[:2].col0.dtype, np.object_)
571570

572571
# with a dtype
573572
for dtype in ['int32','int64','float32','float64']:
@@ -1014,13 +1013,11 @@ def test_fancy_getitem_slice_mixed(self):
10141013
sliced = self.mixed_frame.ix[:, -3:]
10151014
self.assertEqual(sliced['D'].dtype, np.float64)
10161015

1017-
# get view with single block
1018-
# setting it triggers setting with copy
1016+
# Should never act as view due to copy on write
10191017
sliced = self.frame.ix[:, -3:]
10201018
def f():
1021-
sliced['C'] = 4.
1022-
self.assertRaises(com.SettingWithCopyError, f)
1023-
self.assertTrue((self.frame['C'] == 4).all())
1019+
sliced['C'] = 4
1020+
self.assertTrue((self.frame['C'] != 4).all())
10241021

10251022
def test_fancy_setitem_int_labels(self):
10261023
# integer index defers to label-based indexing
@@ -1819,14 +1816,12 @@ def test_irow(self):
18191816
expected = df.ix[8:14]
18201817
assert_frame_equal(result, expected)
18211818

1822-
# verify slice is view
1823-
# setting it makes it raise/warn
1819+
# verify changes on slices never propogate
18241820
def f():
18251821
result[2] = 0.
1826-
self.assertRaises(com.SettingWithCopyError, f)
18271822
exp_col = df[2].copy()
18281823
exp_col[4:8] = 0.
1829-
assert_series_equal(df[2], exp_col)
1824+
self.assertFalse((df[2] == exp_col).all())
18301825

18311826
# list of integers
18321827
result = df.iloc[[1, 2, 4, 6]]
@@ -1854,12 +1849,10 @@ def test_icol(self):
18541849
expected = df.ix[:, 8:14]
18551850
assert_frame_equal(result, expected)
18561851

1857-
# verify slice is view
1858-
# and that we are setting a copy
1852+
# Verify setting on view doesn't propogate
18591853
def f():
18601854
result[8] = 0.
1861-
self.assertRaises(com.SettingWithCopyError, f)
1862-
self.assertTrue((df[8] == 0).all())
1855+
self.assertTrue((df[8] != 0).all())
18631856

18641857
# list of integers
18651858
result = df.iloc[:, [1, 2, 4, 6]]
@@ -4335,12 +4328,6 @@ def test_constructor_with_datetime_tz(self):
43354328
assert_series_equal(df['D'],Series(idx,name='D'))
43364329
del df['D']
43374330

4338-
# assert that A & C are not sharing the same base (e.g. they
4339-
# are copies)
4340-
b1 = df._data.blocks[1]
4341-
b2 = df._data.blocks[2]
4342-
self.assertTrue(b1.values.equals(b2.values))
4343-
self.assertFalse(id(b1.values.values.base) == id(b2.values.values.base))
43444331

43454332
# with nan
43464333
df2 = df.copy()

pandas/tests/test_generic.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -1740,7 +1740,6 @@ def test_copy_on_write(self):
17401740
copies[v] = views[v].copy()
17411741

17421742

1743-
17441743
df.loc[0,'col1'] = -88
17451744

17461745
for v in views.keys():
@@ -1807,6 +1806,23 @@ def test_copy_on_write(self):
18071806
df.loc[0, 'col1']=-88
18081807
self.assertTrue(v.loc[0] == -88)
18091808
self.assertTrue(v._is_view)
1809+
1810+
# Does NOT hold for multi-index (can't guarantee view behaviors --
1811+
# setting on multi-index creates new data somehow.)
1812+
index = pd.MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'],
1813+
['one', 'two', 'three']],
1814+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
1815+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
1816+
names=['first', 'second'])
1817+
frame = pd.DataFrame(np.random.randn(10, 3), index=index,
1818+
columns=pd.Index(['A', 'B', 'C'], name='exp')).T
1819+
1820+
v = frame['foo','one']
1821+
self.assertTrue(v._is_view)
1822+
self.assertFalse(v._is_column_view)
1823+
frame.loc['A', ('foo','one')]=-88
1824+
self.assertFalse(v.loc['A'] == -88)
1825+
18101826

18111827
###
18121828
# Make sure that no problems if view created on view and middle-view
@@ -1827,7 +1843,14 @@ def test_copy_on_write(self):
18271843
tm.assert_frame_equal(v2, v2_copy)
18281844

18291845

1830-
1846+
def test_is_view_of_multiblocks(self):
1847+
# Ensure that if even if only one block of DF is view,
1848+
# returns _is_view = True.
1849+
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
1850+
s = pd.Series([0.5, 0.3, 0.4])
1851+
df['col3'] = s[0:1]
1852+
self.assertTrue(df['col3']._is_view)
1853+
self.assertTrue(df._is_view)
18311854

18321855
class TestPanel(tm.TestCase, Generic):
18331856
_typ = Panel

0 commit comments

Comments
 (0)