Skip to content

Commit 9a4a6c4

Browse files
author
Nick Eubank
committed
Basic working implementation
1 parent 274abee commit 9a4a6c4

File tree

5 files changed

+107
-113
lines changed

5 files changed

+107
-113
lines changed

Diff for: pandas/core/frame.py

-9
Original file line numberDiff line numberDiff line change
@@ -2244,7 +2244,6 @@ def __setitem__(self, key, value):
22442244
self._set_item(key, value)
22452245

22462246
def _setitem_slice(self, key, value):
2247-
self._check_setitem_copy()
22482247
self.ix._setitem_with_indexer(key, value)
22492248

22502249
def _setitem_array(self, key, value):
@@ -2255,7 +2254,6 @@ def _setitem_array(self, key, value):
22552254
(len(key), len(self.index)))
22562255
key = check_bool_indexer(self.index, key)
22572256
indexer = key.nonzero()[0]
2258-
self._check_setitem_copy()
22592257
self.ix._setitem_with_indexer(indexer, value)
22602258
else:
22612259
if isinstance(value, DataFrame):
@@ -2265,7 +2263,6 @@ def _setitem_array(self, key, value):
22652263
self[k1] = value[k2]
22662264
else:
22672265
indexer = self.ix._convert_to_indexer(key, axis=1)
2268-
self._check_setitem_copy()
22692266
self.ix._setitem_with_indexer((slice(None), indexer), value)
22702267

22712268
def _setitem_frame(self, key, value):
@@ -2275,7 +2272,6 @@ def _setitem_frame(self, key, value):
22752272
raise TypeError('Must pass DataFrame with boolean values only')
22762273

22772274
self._check_inplace_setting(value)
2278-
self._check_setitem_copy()
22792275
self.where(-key, value, inplace=True)
22802276

22812277
def _ensure_valid_index(self, value):
@@ -2311,11 +2307,6 @@ def _set_item(self, key, value):
23112307
value = self._sanitize_column(key, value)
23122308
NDFrame._set_item(self, key, value)
23132309

2314-
# check if we are modifying a copy
2315-
# try to set first as we want an invalid
2316-
# value exeption to occur first
2317-
if len(self):
2318-
self._check_setitem_copy()
23192310

23202311
def insert(self, loc, column, value, allow_duplicates=False):
23212312
"""

Diff for: pandas/core/generic.py

+24-99
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ class NDFrame(PandasObject):
8383
_internal_names = ['_data', '_cacher', '_item_cache', '_cache',
8484
'is_copy', '_subtyp', '_index',
8585
'_default_kind', '_default_fill_value', '_metadata',
86-
'__array_struct__', '__array_interface__']
86+
'__array_struct__', '__array_interface__', '_children']
8787
_internal_names_set = set(_internal_names)
8888
_accessors = frozenset([])
8989
_metadata = []
@@ -105,6 +105,8 @@ def __init__(self, data, axes=None, copy=False, dtype=None,
105105
object.__setattr__(self, 'is_copy', None)
106106
object.__setattr__(self, '_data', data)
107107
object.__setattr__(self, '_item_cache', {})
108+
object.__setattr__(self, '_children', [])
109+
108110

109111
def _validate_dtype(self, dtype):
110112
""" validate the passed dtype """
@@ -1174,9 +1176,6 @@ def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
11741176
except:
11751177
pass
11761178

1177-
if verify_is_copy:
1178-
self._check_setitem_copy(stacklevel=5, t='referant')
1179-
11801179
if clear:
11811180
self._clear_item_cache()
11821181

@@ -1201,9 +1200,16 @@ def _slice(self, slobj, axis=0, kind=None):
12011200
# but only in a single-dtyped view slicable case
12021201
is_copy = axis!=0 or result._is_view
12031202
result._set_is_copy(self, copy=is_copy)
1203+
1204+
self._children.append(weakref.ref(result))
1205+
12041206
return result
12051207

12061208
def _set_item(self, key, value):
1209+
1210+
# If children are views, reset to copies before setting.
1211+
self._convert_views_to_copies()
1212+
12071213
self._data.set(key, value)
12081214
self._clear_item_cache()
12091215

@@ -1216,103 +1222,21 @@ def _set_is_copy(self, ref=None, copy=True):
12161222
else:
12171223
self.is_copy = None
12181224

1219-
def _check_is_chained_assignment_possible(self):
1220-
"""
1221-
check if we are a view, have a cacher, and are of mixed type
1222-
if so, then force a setitem_copy check
1225+
def _convert_views_to_copies(self):
1226+
# Don't set on views.
1227+
if self._is_view:
1228+
self._data = self._data.copy()
12231229

1224-
should be called just near setting a value
1230+
# Before setting values, make sure children converted to copies.
1231+
for child in self._children:
12251232

1226-
will return a boolean if it we are a view and are cached, but a single-dtype
1227-
meaning that the cacher should be updated following setting
1228-
"""
1229-
if self._is_view and self._is_cached:
1230-
ref = self._get_cacher()
1231-
if ref is not None and ref._is_mixed_type:
1232-
self._check_setitem_copy(stacklevel=4, t='referant', force=True)
1233-
return True
1234-
elif self.is_copy:
1235-
self._check_setitem_copy(stacklevel=4, t='referant')
1236-
return False
1237-
1238-
def _check_setitem_copy(self, stacklevel=4, t='setting', force=False):
1239-
"""
1240-
1241-
Parameters
1242-
----------
1243-
stacklevel : integer, default 4
1244-
the level to show of the stack when the error is output
1245-
t : string, the type of setting error
1246-
force : boolean, default False
1247-
if True, then force showing an error
1248-
1249-
validate if we are doing a settitem on a chained copy.
1250-
1251-
If you call this function, be sure to set the stacklevel such that the
1252-
user will see the error *at the level of setting*
1253-
1254-
It is technically possible to figure out that we are setting on
1255-
a copy even WITH a multi-dtyped pandas object. In other words, some blocks
1256-
may be views while other are not. Currently _is_view will ALWAYS return False
1257-
for multi-blocks to avoid having to handle this case.
1258-
1259-
df = DataFrame(np.arange(0,9), columns=['count'])
1260-
df['group'] = 'b'
1261-
1262-
# this technically need not raise SettingWithCopy if both are view (which is not
1263-
# generally guaranteed but is usually True
1264-
# however, this is in general not a good practice and we recommend using .loc
1265-
df.iloc[0:5]['group'] = 'a'
1266-
1267-
"""
1268-
1269-
if force or self.is_copy:
1270-
1271-
value = config.get_option('mode.chained_assignment')
1272-
if value is None:
1273-
return
1274-
1275-
# see if the copy is not actually refererd; if so, then disolve
1276-
# the copy weakref
1277-
try:
1278-
gc.collect(2)
1279-
if not gc.get_referents(self.is_copy()):
1280-
self.is_copy = None
1281-
return
1282-
except:
1283-
pass
1284-
1285-
# we might be a false positive
1286-
try:
1287-
if self.is_copy().shape == self.shape:
1288-
self.is_copy = None
1289-
return
1290-
except:
1291-
pass
1292-
1293-
# a custom message
1294-
if isinstance(self.is_copy, string_types):
1295-
t = self.is_copy
1296-
1297-
elif t == 'referant':
1298-
t = ("\n"
1299-
"A value is trying to be set on a copy of a slice from a "
1300-
"DataFrame\n\n"
1301-
"See the caveats in the documentation: "
1302-
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
1303-
1304-
else:
1305-
t = ("\n"
1306-
"A value is trying to be set on a copy of a slice from a "
1307-
"DataFrame.\n"
1308-
"Try using .loc[row_indexer,col_indexer] = value instead\n\n"
1309-
"See the caveats in the documentation: "
1310-
"http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy")
1311-
1312-
if value == 'raise':
1313-
raise SettingWithCopyError(t)
1314-
elif value == 'warn':
1315-
warnings.warn(t, SettingWithCopyWarning, stacklevel=stacklevel)
1233+
# Make sure children of children converted.
1234+
child()._convert_views_to_copies()
1235+
1236+
if child()._is_view:
1237+
child()._data = child()._data.copy()
1238+
1239+
self._children=[]
13161240

13171241
def __delitem__(self, key):
13181242
"""
@@ -2252,6 +2176,7 @@ def __setattr__(self, name, value):
22522176
# e.g. ``obj.x`` and ``obj.x = 4`` will always reference/modify
22532177
# the same attribute.
22542178

2179+
22552180
try:
22562181
object.__getattribute__(self, name)
22572182
return object.__setattr__(self, name, value)

Diff for: pandas/core/indexing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,9 @@ def _get_setitem_indexer(self, key):
111111
raise IndexingError(key)
112112

113113
def __setitem__(self, key, value):
114+
# Make sure changes don't propagate to children
115+
self.obj._convert_views_to_copies()
116+
114117
indexer = self._get_setitem_indexer(key)
115118
self._setitem_with_indexer(indexer, value)
116119

@@ -199,6 +202,7 @@ def _has_valid_positional_setitem_indexer(self, indexer):
199202
def _setitem_with_indexer(self, indexer, value):
200203
self._has_valid_setitem_indexer(indexer)
201204

205+
202206
# also has the side effect of consolidating in-place
203207
from pandas import Panel, DataFrame, Series
204208
info_axis = self.obj._info_axis_number
@@ -508,8 +512,6 @@ def can_do_equal_len():
508512
if isinstance(value, ABCPanel):
509513
value = self._align_panel(indexer, value)
510514

511-
# check for chained assignment
512-
self.obj._check_is_chained_assignment_possible()
513515

514516
# actually do the set
515517
self.obj._consolidate_inplace()

Diff for: pandas/core/series.py

-3
Original file line numberDiff line numberDiff line change
@@ -714,10 +714,7 @@ def setitem(key, value):
714714
self._set_with(key, value)
715715

716716
# do the setitem
717-
cacher_needs_updating = self._check_is_chained_assignment_possible()
718717
setitem(key, value)
719-
if cacher_needs_updating:
720-
self._maybe_update_cacher()
721718

722719
def _set_with_engine(self, key, value):
723720
values = self._values

Diff for: pandas/tests/test_generic.py

+79
Original file line numberDiff line numberDiff line change
@@ -1681,6 +1681,85 @@ def test_set_attribute(self):
16811681
assert_equal(df.y, 5)
16821682
assert_series_equal(df['y'], Series([2, 4, 6], name='y'))
16831683

1684+
def test_copy_on_write(self):
1685+
1686+
#######
1687+
# FORWARD PROPAGATION TESTS
1688+
#######
1689+
1690+
##
1691+
# Test children recorded from various slicing methods
1692+
##
1693+
1694+
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
1695+
self.assertTrue(len(df._children)==0)
1696+
1697+
1698+
views = dict()
1699+
1700+
views['loc'] = df.loc[0:0,]
1701+
views['iloc'] = df.iloc[0:1,]
1702+
views['ix'] = df.ix[0:0,]
1703+
views['loc_of_loc'] = views['loc'].loc[0:0,]
1704+
1705+
copies = dict()
1706+
for v in views.keys():
1707+
self.assertTrue(views[v]._is_view)
1708+
copies[v] = views[v].copy()
1709+
1710+
1711+
1712+
df.loc[0,'col1'] = -88
1713+
1714+
for v in views.keys():
1715+
tm.assert_frame_equal(views[v], copies[v])
1716+
self.assertFalse(views[v]._is_view)
1717+
1718+
##
1719+
# Test views become copies
1720+
# during different forms of value setting.
1721+
##
1722+
1723+
parent = dict()
1724+
views = dict()
1725+
copies = dict()
1726+
for v in ['loc', 'iloc', 'ix', 'column', 'attribute']:
1727+
parent[v] = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
1728+
views[v] = parent[v].loc[0:0,]
1729+
copies[v] = views[v].copy()
1730+
self.assertTrue( views[v]._is_view )
1731+
1732+
parent['loc'].loc[0, 'col1'] = -88
1733+
parent['iloc'].iloc[0, 0] = -88
1734+
parent['ix'].ix[0, 'col1'] = -88
1735+
parent['column']['col1'] = -88
1736+
parent['attribute'].col1 = -88
1737+
1738+
1739+
for v in views.keys():
1740+
tm.assert_frame_equal(views[v], copies[v])
1741+
self.assertFalse(views[v]._is_view)
1742+
1743+
1744+
1745+
########
1746+
# No Backward Propogation
1747+
#######
1748+
df = pd.DataFrame({'col1':[1,2], 'col2':[3,4]})
1749+
df_copy = df.copy()
1750+
1751+
views = dict()
1752+
1753+
views['loc'] = df.loc[0:0,]
1754+
views['iloc'] = df.iloc[0:1,]
1755+
views['ix'] = df.ix[0:0,]
1756+
views['loc_of_loc'] = views['loc'].loc[0:0,]
1757+
1758+
for v in views.keys():
1759+
views[v].loc[0:0,] = -99
1760+
1761+
tm.assert_frame_equal(df, df_copy)
1762+
16841763

16851764
class TestPanel(tm.TestCase, Generic):
16861765
_typ = Panel

0 commit comments

Comments
 (0)