Skip to content

Commit 89e6092

Browse files
committed
Merge pull request #8520 from jreback/indexing
BUG: Bug in inplace operations with column sub-selections on the lhs (GH8511)
2 parents a05b8ed + 9e0122a commit 89e6092

File tree

7 files changed

+201
-19
lines changed

7 files changed

+201
-19
lines changed

doc/source/v0.15.0.txt

+40-1
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,46 @@ API changes
272272
df
273273
df.dtypes
274274

275+
- In prior versions, updating a pandas object inplace would not reflect in other python references to this object. (:issue:`8511`,:issue:`5104`)
276+
277+
.. ipython:: python
278+
279+
s = Series([1, 2, 3])
280+
s2 = s
281+
s += 1.5
282+
283+
Behavior prior to v0.15.0
284+
285+
.. code-block:: python
286+
287+
288+
# the original object
289+
In [5]: s
290+
Out[5]:
291+
0 2.5
292+
1 3.5
293+
2 4.5
294+
dtype: float64
295+
296+
297+
# a reference to the original object
298+
In [7]: s2
299+
Out[7]:
300+
0 1
301+
1 2
302+
2 3
303+
dtype: int64
304+
305+
This is now the correct behavior
306+
307+
.. ipython:: python
308+
309+
# the original object
310+
s
311+
312+
# a reference to the original object
313+
s2
314+
275315
- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of ``DataFrame.to_csv()`` (:issue:`8215`).
276316

277317
- ``read_hdf`` now raises ``IOError`` when a file that doesn't exist is passed in. Previously, a new, empty file was created, and a ``KeyError`` raised (:issue:`7715`).
@@ -954,7 +994,6 @@ Bug Fixes
954994
- Bug in ``DataFrame.reset_index`` which has ``MultiIndex`` contains ``PeriodIndex`` or ``DatetimeIndex`` with tz raises ``ValueError`` (:issue:`7746`, :issue:`7793`)
955995

956996

957-
958997
- Bug in ``DataFrame.plot`` with ``subplots=True`` may draw unnecessary minor xticks and yticks (:issue:`7801`)
959998
- Bug in ``StataReader`` which did not read variable labels in 117 files due to difference between Stata documentation and implementation (:issue:`7816`)
960999
- Bug in ``StataReader`` where strings were always converted to 244 characters-fixed width irrespective of underlying string size (:issue:`7858`)

pandas/core/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -538,6 +538,6 @@ def duplicated(self, take_last=False):
538538
#----------------------------------------------------------------------
539539
# abstracts
540540

541-
def _update_inplace(self, result):
541+
def _update_inplace(self, result, **kwargs):
542542
raise NotImplementedError
543543

pandas/core/generic.py

+29-8
Original file line numberDiff line numberDiff line change
@@ -1108,9 +1108,21 @@ def _is_view(self):
11081108
""" boolean : return if I am a view of another array """
11091109
return self._data.is_view
11101110

1111-
def _maybe_update_cacher(self, clear=False):
1112-
""" see if we need to update our parent cacher
1113-
if clear, then clear our cache """
1111+
def _maybe_update_cacher(self, clear=False, verify_is_copy=True):
1112+
"""
1113+
1114+
see if we need to update our parent cacher
1115+
if clear, then clear our cache
1116+
1117+
Parameters
1118+
----------
1119+
clear : boolean, default False
1120+
clear the item cache
1121+
verify_is_copy : boolean, default True
1122+
provide is_copy checks
1123+
1124+
"""
1125+
11141126
cacher = getattr(self, '_cacher', None)
11151127
if cacher is not None:
11161128
ref = cacher[1]()
@@ -1125,8 +1137,8 @@ def _maybe_update_cacher(self, clear=False):
11251137
except:
11261138
pass
11271139

1128-
# check if we are a copy
1129-
self._check_setitem_copy(stacklevel=5, t='referant')
1140+
if verify_is_copy:
1141+
self._check_setitem_copy(stacklevel=5, t='referant')
11301142

11311143
if clear:
11321144
self._clear_item_cache()
@@ -1564,14 +1576,23 @@ def drop(self, labels, axis=0, level=None, inplace=False, **kwargs):
15641576
else:
15651577
return result
15661578

1567-
def _update_inplace(self, result):
1568-
"replace self internals with result."
1579+
def _update_inplace(self, result, verify_is_copy=True):
1580+
"""
1581+
replace self internals with result.
1582+
1583+
Parameters
1584+
----------
1585+
verify_is_copy : boolean, default True
1586+
provide is_copy checks
1587+
1588+
"""
15691589
# NOTE: This does *not* call __finalize__ and that's an explicit
15701590
# decision that we may revisit in the future.
1591+
15711592
self._reset_cache()
15721593
self._clear_item_cache()
15731594
self._data = getattr(result,'_data',result)
1574-
self._maybe_update_cacher()
1595+
self._maybe_update_cacher(verify_is_copy=verify_is_copy)
15751596

15761597
def add_prefix(self, prefix):
15771598
"""

pandas/core/index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ def _simple_new(cls, values, name=None, **kwargs):
220220
result._reset_identity()
221221
return result
222222

223-
def _update_inplace(self, result):
223+
def _update_inplace(self, result, **kwargs):
224224
# guard when called from IndexOpsMixin
225225
raise TypeError("Index can't be updated inplace")
226226

pandas/core/ops.py

+24-5
Original file line numberDiff line numberDiff line change
@@ -161,20 +161,39 @@ def add_special_arithmetic_methods(cls, arith_method=None, radd_func=None,
161161
if passed, will not set functions with names in exclude
162162
"""
163163
radd_func = radd_func or operator.add
164+
164165
# in frame, special methods have default_axis = None, comp methods use
165166
# 'columns'
167+
166168
new_methods = _create_methods(arith_method, radd_func, comp_method,
167169
bool_method, use_numexpr, default_axis=None,
168170
special=True)
169171

170172
# inplace operators (I feel like these should get passed an `inplace=True`
171173
# or just be removed
174+
175+
def _wrap_inplace_method(method):
176+
"""
177+
return an inplace wrapper for this method
178+
"""
179+
180+
def f(self, other):
181+
result = method(self, other)
182+
183+
# this makes sure that we are aligned like the input
184+
# we are updating inplace so we want to ignore is_copy
185+
self._update_inplace(result.reindex_like(self,copy=False)._data,
186+
verify_is_copy=False)
187+
188+
return self
189+
return f
190+
172191
new_methods.update(dict(
173-
__iadd__=new_methods["__add__"],
174-
__isub__=new_methods["__sub__"],
175-
__imul__=new_methods["__mul__"],
176-
__itruediv__=new_methods["__truediv__"],
177-
__ipow__=new_methods["__pow__"]
192+
__iadd__=_wrap_inplace_method(new_methods["__add__"]),
193+
__isub__=_wrap_inplace_method(new_methods["__sub__"]),
194+
__imul__=_wrap_inplace_method(new_methods["__mul__"]),
195+
__itruediv__=_wrap_inplace_method(new_methods["__truediv__"]),
196+
__ipow__=_wrap_inplace_method(new_methods["__pow__"]),
178197
))
179198
if not compat.PY3:
180199
new_methods["__idiv__"] = new_methods["__div__"]

pandas/core/series.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -267,8 +267,9 @@ def _set_subtyp(self, is_all_dates):
267267
else:
268268
object.__setattr__(self, '_subtyp', 'series')
269269

270-
def _update_inplace(self, result):
271-
return generic.NDFrame._update_inplace(self, result)
270+
def _update_inplace(self, result, **kwargs):
271+
# we want to call the generic version and not the IndexOpsMixin
272+
return generic.NDFrame._update_inplace(self, result, **kwargs)
272273

273274
# ndarray compatibility
274275
@property

pandas/tests/test_frame.py

+103-1
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,109 @@ def test_setitem_mulit_index(self):
249249
df[('joe', 'last')] = df[('jolie', 'first')].loc[i, j]
250250
assert_frame_equal(df[('joe', 'last')], df[('jolie', 'first')])
251251

252+
def test_inplace_ops_alignment(self):
253+
254+
# inplace ops / ops alignment
255+
# GH 8511
256+
257+
columns = list('abcdefg')
258+
X_orig = DataFrame(np.arange(10*len(columns)).reshape(-1,len(columns)), columns=columns, index=range(10))
259+
Z = 100*X_orig.iloc[:,1:-1].copy()
260+
block1 = list('bedcf')
261+
subs = list('bcdef')
262+
263+
# add
264+
X = X_orig.copy()
265+
result1 = (X[block1] + Z).reindex(columns=subs)
266+
267+
X[block1] += Z
268+
result2 = X.reindex(columns=subs)
269+
270+
X = X_orig.copy()
271+
result3 = (X[block1] + Z[block1]).reindex(columns=subs)
272+
273+
X[block1] += Z[block1]
274+
result4 = X.reindex(columns=subs)
275+
276+
assert_frame_equal(result1, result2)
277+
assert_frame_equal(result1, result3)
278+
assert_frame_equal(result1, result4)
279+
280+
# sub
281+
X = X_orig.copy()
282+
result1 = (X[block1] - Z).reindex(columns=subs)
283+
284+
X[block1] -= Z
285+
result2 = X.reindex(columns=subs)
286+
287+
X = X_orig.copy()
288+
result3 = (X[block1] - Z[block1]).reindex(columns=subs)
289+
290+
X[block1] -= Z[block1]
291+
result4 = X.reindex(columns=subs)
292+
293+
assert_frame_equal(result1, result2)
294+
assert_frame_equal(result1, result3)
295+
assert_frame_equal(result1, result4)
296+
297+
def test_inplace_ops_identity(self):
298+
299+
# GH 5104
300+
# make sure that we are actually changing the object
301+
s_orig = Series([1, 2, 3])
302+
df_orig = DataFrame(np.random.randint(0,5,size=10).reshape(-1,5))
303+
304+
# no dtype change
305+
s = s_orig.copy()
306+
s2 = s
307+
s += 1
308+
assert_series_equal(s,s2)
309+
assert_series_equal(s_orig+1,s)
310+
self.assertIs(s,s2)
311+
self.assertIs(s._data,s2._data)
312+
313+
df = df_orig.copy()
314+
df2 = df
315+
df += 1
316+
assert_frame_equal(df,df2)
317+
assert_frame_equal(df_orig+1,df)
318+
self.assertIs(df,df2)
319+
self.assertIs(df._data,df2._data)
320+
321+
# dtype change
322+
s = s_orig.copy()
323+
s2 = s
324+
s += 1.5
325+
assert_series_equal(s,s2)
326+
assert_series_equal(s_orig+1.5,s)
327+
328+
df = df_orig.copy()
329+
df2 = df
330+
df += 1.5
331+
assert_frame_equal(df,df2)
332+
assert_frame_equal(df_orig+1.5,df)
333+
self.assertIs(df,df2)
334+
self.assertIs(df._data,df2._data)
335+
336+
# mixed dtype
337+
arr = np.random.randint(0,10,size=5)
338+
df_orig = DataFrame({'A' : arr.copy(), 'B' : 'foo'})
339+
df = df_orig.copy()
340+
df2 = df
341+
df['A'] += 1
342+
expected = DataFrame({'A' : arr.copy()+1, 'B' : 'foo'})
343+
assert_frame_equal(df,expected)
344+
assert_frame_equal(df2,expected)
345+
self.assertIs(df._data,df2._data)
346+
347+
df = df_orig.copy()
348+
df2 = df
349+
df['A'] += 1.5
350+
expected = DataFrame({'A' : arr.copy()+1.5, 'B' : 'foo'})
351+
assert_frame_equal(df,expected)
352+
assert_frame_equal(df2,expected)
353+
self.assertIs(df._data,df2._data)
354+
252355
def test_getitem_boolean(self):
253356
# boolean indexing
254357
d = self.tsframe.index[10]
@@ -4979,7 +5082,6 @@ def test_div(self):
49795082
self.assertFalse(np.array_equal(res.fillna(0), res2.fillna(0)))
49805083

49815084
def test_logical_operators(self):
4982-
import operator
49835085

49845086
def _check_bin_op(op):
49855087
result = op(df1, df2)

0 commit comments

Comments
 (0)