Skip to content

Commit be66773

Browse files
committed
Merge branch 'resource-handling'
Should fix #22
2 parents e0b0bec + f071ffd commit be66773

File tree

8 files changed

+65
-53
lines changed

8 files changed

+65
-53
lines changed

doc/source/changes.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
Changelog
33
#########
44

5+
**********
6+
v0.9.0
7+
**********
8+
- Fixed issue with resources never being freed as mmaps were never closed.
9+
- Client counting is now done manually, instead of relying on pyton's reference count
10+
511
**********
612
v0.8.5
713
**********

smmap/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
__author__ = "Sebastian Thiel"
44
__contact__ = "[email protected]"
55
__homepage__ = "https://github.com/Byron/smmap"
6-
version_info = (0, 8, 5)
6+
version_info = (0, 9, 0)
77
__version__ = '.'.join(str(i) for i in version_info)
88

99
# make everything available in root package for convenience

smmap/buf.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def __getslice__(self, i, j):
9393
l -= len(d)
9494
# This is slower than the join ... but what can we do ...
9595
out += d
96+
del(d)
9697
# END while there are bytes to read
9798
return out
9899
else:
@@ -103,6 +104,10 @@ def __getslice__(self, i, j):
103104
d = c.buffer()[:l]
104105
ofs += len(d)
105106
l -= len(d)
107+
# Make sure we don't keep references, as c.use_region() might attempt to free resources, but
108+
# can't unless we use pure bytes
109+
if hasattr(d, 'tobytes'):
110+
d = d.tobytes()
106111
md.append(d)
107112
# END while there are bytes to read
108113
return bytes().join(md)

smmap/mman.py

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
buffer,
99
)
1010

11-
from weakref import ref
1211
import sys
1312
from functools import reduce
1413

@@ -55,12 +54,11 @@ def _destroy(self):
5554
# Actual client count, which doesn't include the reference kept by the manager, nor ours
5655
# as we are about to be deleted
5756
try:
58-
num_clients = self._rlist.client_count() - 2
59-
if num_clients == 0 and len(self._rlist) == 0:
57+
if len(self._rlist) == 0:
6058
# Free all resources associated with the mapped file
6159
self._manager._fdict.pop(self._rlist.path_or_fd())
6260
# END remove regions list from manager
63-
except TypeError:
61+
except (TypeError, KeyError):
6462
# sometimes, during shutdown, getrefcount is None. Its possible
6563
# to re-import it, however, its probably better to just ignore
6664
# this python problem (for now).
@@ -72,13 +70,16 @@ def _destroy(self):
7270
def _copy_from(self, rhs):
7371
"""Copy all data from rhs into this instance, handles usage count"""
7472
self._manager = rhs._manager
75-
self._rlist = rhs._rlist
73+
self._rlist = type(rhs._rlist)(rhs._rlist)
7674
self._region = rhs._region
7775
self._ofs = rhs._ofs
7876
self._size = rhs._size
7977

78+
for region in self._rlist:
79+
region.increment_client_count()
80+
8081
if self._region is not None:
81-
self._region.increment_usage_count()
82+
self._region.increment_client_count()
8283
# END handle regions
8384

8485
def __copy__(self):
@@ -126,20 +127,22 @@ def use_region(self, offset=0, size=0, flags=0):
126127

127128
if need_region:
128129
self._region = man._obtain_region(self._rlist, offset, size, flags, False)
130+
self._region.increment_client_count()
129131
# END need region handling
130132

131-
self._region.increment_usage_count()
132133
self._ofs = offset - self._region._b
133134
self._size = min(size, self._region.ofs_end() - offset)
134135

135136
return self
136137

137138
def unuse_region(self):
138-
"""Unuse the ucrrent region. Does nothing if we have no current region
139+
"""Unuse the current region. Does nothing if we have no current region
139140
140141
**Note:** the cursor unuses the region automatically upon destruction. It is recommended
141142
to un-use the region once you are done reading from it in persistent cursors as it
142143
helps to free up resource more quickly"""
144+
if self._region is not None:
145+
self._region.increment_client_count(-1)
143146
self._region = None
144147
# note: should reset ofs and size, but we spare that for performance. Its not
145148
# allowed to query information if we are not valid !
@@ -184,12 +187,10 @@ def size(self):
184187
""":return: amount of bytes we point to"""
185188
return self._size
186189

187-
def region_ref(self):
188-
""":return: weak ref to our mapped region.
190+
def region(self):
191+
""":return: our mapped region, or None if nothing is mapped yet
189192
:raise AssertionError: if we have no current region. This is only useful for debugging"""
190-
if self._region is None:
191-
raise AssertionError("region not set")
192-
return ref(self._region)
193+
return self._region
193194

194195
def includes_ofs(self, ofs):
195196
""":return: True if the given absolute offset is contained in the cursors
@@ -311,8 +312,8 @@ def _collect_lru_region(self, size):
311312
lru_list = None
312313
for regions in self._fdict.values():
313314
for region in regions:
314-
# check client count - consider that we keep one reference ourselves !
315-
if (region.client_count() - 2 == 0 and
315+
# check client count - if it's 1, it's just us
316+
if (region.client_count() == 1 and
316317
(lru_region is None or region._uc < lru_region._uc)):
317318
lru_region = region
318319
lru_list = regions
@@ -326,6 +327,7 @@ def _collect_lru_region(self, size):
326327

327328
num_found += 1
328329
del(lru_list[lru_list.index(lru_region)])
330+
lru_region.increment_client_count(-1)
329331
self._memory_size -= lru_region.size()
330332
self._handle_count -= 1
331333
# END while there is more memory to free
@@ -449,7 +451,7 @@ def force_map_handle_removal_win(self, base_path):
449451
for path, rlist in self._fdict.items():
450452
if path.startswith(base_path):
451453
for region in rlist:
452-
region._mf.close()
454+
region.release()
453455
num_closed += 1
454456
# END path matches
455457
# END for each path

smmap/test/test_buf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
from time import time
1313
import sys
1414
import os
15-
import logging
1615

1716

1817
man_optimal = SlidingWindowMapManager()
@@ -104,6 +103,7 @@ def test_basics(self):
104103
assert len(d) == ofs_end - ofs_start
105104
assert d == data[ofs_start:ofs_end]
106105
num_bytes += len(d)
106+
del d
107107
else:
108108
pos = randint(0, fsize)
109109
assert buf[pos] == data[pos]
@@ -122,6 +122,7 @@ def test_basics(self):
122122
% (man_id, max_num_accesses, mode_str, type(item), num_bytes / mb, elapsed, (num_bytes / mb) / elapsed),
123123
file=sys.stderr)
124124
# END handle access mode
125+
del buf
125126
# END for each manager
126127
# END for each input
127128
os.close(fd)

smmap/test/test_mman.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -119,29 +119,29 @@ def test_memman_operation(self):
119119
# window size is 0 for static managers, hence size will be 0. We take that into consideration
120120
size = man.window_size() // 2
121121
assert c.use_region(base_offset, size).is_valid()
122-
rr = c.region_ref()
123-
assert rr().client_count() == 2 # the manager and the cursor and us
122+
rr = c.region()
123+
assert rr.client_count() == 2 # the manager and the cursor and us
124124

125125
assert man.num_open_files() == 1
126126
assert man.num_file_handles() == 1
127-
assert man.mapped_memory_size() == rr().size()
127+
assert man.mapped_memory_size() == rr.size()
128128

129129
# assert c.size() == size # the cursor may overallocate in its static version
130130
assert c.ofs_begin() == base_offset
131-
assert rr().ofs_begin() == 0 # it was aligned and expanded
131+
assert rr.ofs_begin() == 0 # it was aligned and expanded
132132
if man.window_size():
133133
# but isn't larger than the max window (aligned)
134-
assert rr().size() == align_to_mmap(man.window_size(), True)
134+
assert rr.size() == align_to_mmap(man.window_size(), True)
135135
else:
136-
assert rr().size() == fc.size
136+
assert rr.size() == fc.size
137137
# END ignore static managers which dont use windows and are aligned to file boundaries
138138

139139
assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())]
140140

141141
# obtain second window, which spans the first part of the file - it is a still the same window
142142
nsize = (size or fc.size) - 10
143143
assert c.use_region(0, nsize).is_valid()
144-
assert c.region_ref()() == rr()
144+
assert c.region() == rr
145145
assert man.num_file_handles() == 1
146146
assert c.size() == nsize
147147
assert c.ofs_begin() == 0
@@ -154,15 +154,15 @@ def test_memman_operation(self):
154154
if man.window_size():
155155
assert man.num_file_handles() == 2
156156
assert c.size() < size
157-
assert c.region_ref()() is not rr() # old region is still available, but has not curser ref anymore
158-
assert rr().client_count() == 1 # only held by manager
157+
assert c.region() is not rr # old region is still available, but has not curser ref anymore
158+
assert rr.client_count() == 1 # only held by manager
159159
else:
160160
assert c.size() < fc.size
161161
# END ignore static managers which only have one handle per file
162-
rr = c.region_ref()
163-
assert rr().client_count() == 2 # manager + cursor
164-
assert rr().ofs_begin() < c.ofs_begin() # it should have extended itself to the left
165-
assert rr().ofs_end() <= fc.size # it cannot be larger than the file
162+
rr = c.region()
163+
assert rr.client_count() == 2 # manager + cursor
164+
assert rr.ofs_begin() < c.ofs_begin() # it should have extended itself to the left
165+
assert rr.ofs_end() <= fc.size # it cannot be larger than the file
166166
assert c.buffer()[:] == data[base_offset:base_offset + (size or c.size())]
167167

168168
# unising a region makes the cursor invalid

smmap/test/test_util.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,12 +88,7 @@ def test_region(self):
8888
# auto-refcount
8989
assert rfull.client_count() == 1
9090
rfull2 = rfull
91-
assert rfull.client_count() == 2
92-
93-
# usage
94-
assert rfull.usage_count() == 0
95-
rfull.increment_usage_count()
96-
assert rfull.usage_count() == 1
91+
assert rfull.client_count() == 1, "no auto-counting"
9792

9893
# window constructor
9994
w = MapWindow.from_region(rfull)
@@ -106,8 +101,6 @@ def test_region_list(self):
106101
for item in (fc.path, fd):
107102
ml = MapRegionList(item)
108103

109-
assert ml.client_count() == 1
110-
111104
assert len(ml) == 0
112105
assert ml.path_or_fd() == item
113106
assert ml.file_size() == fc.size

smmap/util.py

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ def __init__(self, path_or_fd, ofs, size, flags=0):
177177
os.close(fd)
178178
# END only close it if we opened it
179179
# END close file handle
180+
# We assume the first one to use us keeps us around
181+
self.increment_client_count()
180182

181183
def _read_into_memory(self, fd, offset, size):
182184
""":return: string data as read from the given file descriptor, offset and size """
@@ -222,17 +224,25 @@ def includes_ofs(self, ofs):
222224

223225
def client_count(self):
224226
""":return: number of clients currently using this region"""
225-
from sys import getrefcount
226-
# -1: self on stack, -1 self in this method, -1 self in getrefcount
227-
return getrefcount(self) - 3
228-
229-
def usage_count(self):
230-
""":return: amount of usages so far"""
231227
return self._uc
232228

233-
def increment_usage_count(self):
234-
"""Adjust the usage count by the given positive or negative offset"""
235-
self._uc += 1
229+
def increment_client_count(self, ofs = 1):
230+
"""Adjust the usage count by the given positive or negative offset.
231+
If usage count equals 0, we will auto-release our resources
232+
:return: True if we released resources, False otherwise. In the latter case, we can still be used"""
233+
self._uc += ofs
234+
assert self._uc > -1, "Increments must match decrements, usage counter negative: %i" % self._uc
235+
236+
if self.client_count() == 0:
237+
self.release()
238+
return True
239+
else:
240+
return False
241+
# end handle release
242+
243+
def release(self):
244+
"""Release all resources this instance might hold. Must only be called if there usage_count() is zero"""
245+
self._mf.close()
236246

237247
# re-define all methods which need offset adjustments in compatibility mode
238248
if _need_compat_layer:
@@ -268,11 +278,6 @@ def __init__(self, path_or_fd):
268278
self._path_or_fd = path_or_fd
269279
self._file_size = None
270280

271-
def client_count(self):
272-
""":return: amount of clients which hold a reference to this instance"""
273-
from sys import getrefcount
274-
return getrefcount(self) - 3
275-
276281
def path_or_fd(self):
277282
""":return: path or file descriptor we are attached to"""
278283
return self._path_or_fd

0 commit comments

Comments
 (0)