Skip to content

Commit f8cc267

Browse files
committed
BUG/ENH: fix stale reference caused by block modification described in #473, add block splitting logic per #158
1 parent 1eb1e71 commit f8cc267

File tree

5 files changed

+90
-6
lines changed

5 files changed

+90
-6
lines changed

RELEASE.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ pandas 0.6.1
7777
rolling_corr/rolling_cov (GH #462)
7878
- Fix performance regressionm in cross-sectional count in DataFrame,
7979
affecting DataFrame.dropna speed
80+
- Column deletion in DataFrame copies no data (computes views on blocks) (GH
81+
#158)
8082

8183
**Bug fixes**
8284

@@ -100,6 +102,9 @@ pandas 0.6.1
100102
output type, handle tuple return values and other things that were breaking
101103
(GH #465)
102104
- Handle floating point index values in HDFStore (GH #454)
105+
- Fixed stale column reference bug (cached Series object) caused by type
106+
change / item deletion in DataFrame (GH #473)
107+
103108

104109
Thanks
105110
------

bench/bench_dense_to_sparse.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
from pandas import *
2+
3+
K = 100
4+
N = 100000
5+
rng = DateRange('1/1/2000', periods=N, offset=datetools.Minute())
6+
7+
rng2 = np.asarray(rng).astype('M8').astype('i8')
8+
9+
series = {}
10+
for i in range(1, K + 1):
11+
data = np.random.randn(N)[:-i]
12+
this_rng = rng2[:-i]
13+
data[100:] = np.nan
14+
series[i] = SparseSeries(data, index=this_rng)
15+

pandas/core/internals.py

Lines changed: 41 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,40 @@ def delete(self, item):
160160
new_values = np.delete(self.values, loc, 0)
161161
return make_block(new_values, new_items, self.ref_items)
162162

163+
def split_block_at(self, item):
164+
"""
165+
Split block around given column, for "deleting" a column without
166+
having to copy data by returning views on the original array
167+
168+
Returns
169+
-------
170+
leftb, rightb : (Block or None, Block or None)
171+
"""
172+
loc = self.items.get_loc(item)
173+
174+
if len(self.items) == 1:
175+
# no blocks left
176+
return None, None
177+
178+
if loc == 0:
179+
# at front
180+
left_block = None
181+
right_block = make_block(self.values[1:], self.items[1:].copy(),
182+
self.ref_items)
183+
elif loc == len(self.values) - 1:
184+
# at back
185+
left_block = make_block(self.values[:-1], self.items[:-1].copy(),
186+
self.ref_items)
187+
right_block = None
188+
else:
189+
# in the middle
190+
left_block = make_block(self.values[:loc],
191+
self.items[:loc].copy(), self.ref_items)
192+
right_block = make_block(self.values[loc + 1:],
193+
self.items[loc + 1:].copy(), self.ref_items)
194+
195+
return left_block, right_block
196+
163197
def fillna(self, value):
164198
new_values = self.values.copy()
165199
mask = common.isnull(new_values.ravel())
@@ -573,13 +607,14 @@ def _delete_from_block(self, i, item):
573607
"""
574608
Delete and maybe remove the whole block
575609
"""
576-
block = self.blocks[i]
577-
newb = block.delete(item)
610+
block = self.blocks.pop(i)
611+
new_left, new_right = block.split_block_at(item)
578612

579-
if len(newb.ref_locs) == 0:
580-
self.blocks.pop(i)
581-
else:
582-
self.blocks[i] = newb
613+
if new_left is not None:
614+
self.blocks.append(new_left)
615+
616+
if new_right is not None:
617+
self.blocks.append(new_right)
583618

584619
def _add_new_block(self, item, value):
585620
# Do we care about dtype at the moment?

pandas/tests/test_frame.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3879,6 +3879,17 @@ def test_idxmax(self):
38793879

38803880
self.assertRaises(Exception, frame.idxmax, axis=2)
38813881

3882+
def test_stale_cached_series_bug_473(self):
3883+
Y = DataFrame(np.random.random((4, 4)), index=('a', 'b','c','d'),
3884+
columns=('e','f','g','h'))
3885+
repr(Y)
3886+
Y['e'] = Y['e'].astype('object')
3887+
Y['g']['c'] = np.NaN
3888+
repr(Y)
3889+
result = Y.sum()
3890+
exp = Y['g'].sum()
3891+
self.assert_(isnull(Y['g']['c']))
3892+
38823893
class TestDataFrameJoin(unittest.TestCase):
38833894

38843895
def setUp(self):

pandas/tests/test_internals.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,24 @@ def test_delete(self):
138138

139139
self.assertRaises(Exception, self.fblock.delete, 'b')
140140

141+
def test_split_block_at(self):
142+
left, right = self.fblock.split_block_at('a')
143+
self.assert_(left is None)
144+
self.assert_(np.array_equal(right.items, ['c', 'e']))
145+
146+
left, right = self.fblock.split_block_at('c')
147+
self.assert_(np.array_equal(left.items, ['a']))
148+
self.assert_(np.array_equal(right.items, ['e']))
149+
150+
left, right = self.fblock.split_block_at('e')
151+
self.assert_(np.array_equal(left.items, ['a', 'c']))
152+
self.assert_(right is None)
153+
154+
bblock = get_bool_ex(['f'])
155+
left, right = bblock.split_block_at('f')
156+
self.assert_(left is None)
157+
self.assert_(right is None)
158+
141159
def test_get(self):
142160
pass
143161

0 commit comments

Comments
 (0)