Skip to content

Commit 8489c31

Browse files
committed
refact(buf): simplify API - no begin/end after construct
+ feat(mman): report missed exits.
1 parent 33f12e6 commit 8489c31

File tree

6 files changed

+140
-142
lines changed

6 files changed

+140
-142
lines changed

doc/source/changes.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ Changelog
1515

1616
Get them from ``smmap.managed_mmaps()``.
1717

18-
- Retrofit :class:`SlidingWindowMapBuffer` also as context-manager.
18+
- Simplify :class:`SlidingWindowMapBuffer` as create/close context-manager
19+
(no ``begin_access()``, or ``end_access()``).
20+
1921

2022
v0.9.0
2123
========

doc/source/tutorial.rst

+55-54
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@ This text briefly introduces you to the basic design decisions and accompanying
88
Design
99
======
1010
Per application, there must be a *MemoryManager* to be used throughout the application.
11-
It can be configured to keep your resources within certain limits.
11+
It can be configured to keep your resources within certain limits (see :func:`smmap.managed_mmaps()`).
1212

13-
To access mapped regions, you require a cursor. Cursors point to exactly one file and serve as handles into it.
13+
To access mapped regions, you require a cursor. Cursors point to exactly one file
14+
and serve as handles into it.
1415
As long as it exists, the respective memory region will remain available.
1516

16-
For convenience, a buffer implementation is provided which handles cursors and resource allocation
17-
behind its simple buffer like interface.
17+
For convenience, a buffer implementation is provided (:class:`smmap.SlidingWindowMapBuffer`)
18+
which handles cursors and resource allocation behind its simple buffer like interface.
1819

1920

2021
Memory Managers
@@ -56,50 +57,48 @@ Cursors
5657
========
5758
*Cursors* are handles that point onto a window, i.e. a region of a file mapped into memory. From them you may obtain a buffer through which the data of that window can actually be accessed::
5859

59-
import smmap.test.lib
60-
61-
with smmap.managed_mmaps() as mman:
62-
fc = smmap.test.lib.FileCreator(1024*1024*8, "test_file")
60+
import smmap.test.lib as tlib
6361

62+
with smmap.managed_mmaps() as mman, tlib.FileCreator(1024*1024*8, "test_file") as fc:
6463
# obtain a cursor to access some file.
65-
c = mman.make_cursor(fc.path)
66-
67-
# the cursor is now associated with the file, but not yet usable
68-
assert c.is_associated()
69-
assert not c.is_valid()
70-
71-
# before you can use the cursor, you have to specify a window you want to
72-
# access. The following just says you want as much data as possible starting
73-
# from offset 0.
74-
# To be sure your region could be mapped, query for validity
75-
assert c.use_region().is_valid() # use_region returns self
76-
77-
# once a region was mapped, you must query its dimension regularly
78-
# to assure you don't try to access its buffer out of its bounds
79-
assert c.size()
80-
c.buffer()[0] # first byte
81-
c.buffer()[1:10] # first 9 bytes
82-
c.buffer()[c.size()-1] # last byte
83-
84-
# its recommended not to create big slices when feeding the buffer
85-
# into consumers (e.g. struct or zlib).
86-
# Instead, either give the buffer directly, or use pythons buffer command.
87-
buffer(c.buffer(), 1, 9) # first 9 bytes without copying them
88-
89-
# you can query absolute offsets, and check whether an offset is included
90-
# in the cursor's data.
91-
assert c.ofs_begin() < c.ofs_end()
92-
assert c.includes_ofs(100)
93-
94-
# If you are over out of bounds with one of your region requests, the
95-
# cursor will be come invalid. It cannot be used in that state
96-
assert not c.use_region(fc.size, 100).is_valid()
97-
# map as much as possible after skipping the first 100 bytes
98-
assert c.use_region(100).is_valid()
99-
100-
# You must explicitly free cursor resources by unusing the cursor's region
101-
c.unuse_region()
102-
assert not c.is_valid()
64+
with mman.make_cursor(fc.path) as c:
65+
66+
# the cursor is now associated with the file, but not yet usable
67+
assert c.is_associated()
68+
assert not c.is_valid()
69+
70+
# before you can use the cursor, you have to specify a window you want to
71+
# access. The following just says you want as much data as possible starting
72+
# from offset 0.
73+
# To be sure your region could be mapped, query for validity
74+
assert c.use_region().is_valid() # use_region returns self
75+
76+
# once a region was mapped, you must query its dimension regularly
77+
# to assure you don't try to access its buffer out of its bounds
78+
assert c.size()
79+
c.buffer()[0] # first byte
80+
c.buffer()[1:10] # first 9 bytes
81+
c.buffer()[c.size()-1] # last byte
82+
83+
# its recommended not to create big slices when feeding the buffer
84+
# into consumers (e.g. struct or zlib).
85+
# Instead, either give the buffer directly, or use pythons buffer command.
86+
buffer(c.buffer(), 1, 9) # first 9 bytes without copying them
87+
88+
# you can query absolute offsets, and check whether an offset is included
89+
# in the cursor's data.
90+
assert c.ofs_begin() < c.ofs_end()
91+
assert c.includes_ofs(100)
92+
93+
# If you are over out of bounds with one of your region requests, the
94+
# cursor will be come invalid. It cannot be used in that state
95+
assert not c.use_region(fc.size, 100).is_valid()
96+
# map as much as possible after skipping the first 100 bytes
97+
assert c.use_region(100).is_valid()
98+
99+
# You must explicitly free cursor resources by unusing the cursor's region
100+
c.unuse_region()
101+
assert not c.is_valid()
103102

104103

105104
Now you would have to write your algorithms around this interface to properly slide through huge amounts of data.
@@ -116,21 +115,23 @@ which uses a cursor underneath.
116115
With it, you can access all data in a possibly huge file
117116
without having to take care of setting the cursor to different regions yourself::
118117

119-
# Create a default buffer which can operate on the whole file
120-
with smmap.SlidingWindowMapBuffer(mman.make_cursor(fc.path)) as buf:
121-
118+
## Create a default buffer which can operate on the whole file
119+
cur = mman.make_cursor(fc.path)
120+
with smmap.SlidingWindowMapBuffer(cur) as buf:
122121
# you can use it right away
123122
assert buf.cursor().is_valid()
124123

125124
buf[0] # access the first byte
126125
buf[-1] # access the last ten bytes on the file
127126
buf[-10:]# access the last ten bytes
128127

129-
# If you want to keep the instance between different accesses, use the
130-
# dedicated methods
131-
buf.end_access()
132-
assert not buf.cursor().is_valid() # you cannot use the buffer anymore
133-
assert buf.begin_access(offset=10) # start using the buffer at an offset
128+
## You cannot use the buffer anymore.
129+
assert not buf.cursor().is_valid()
130+
131+
## If you want to keep the instance between different accesses,
132+
# use another instance.
133+
with smmap.SlidingWindowMapBuffer(cur, offset=10) as buf:
134+
assert buf.cursor().is_valid()
134135

135136

136137
Disadvantages

smmap/buf.py

+46-42
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""Module with a simple buffer implementation using the memory manager"""
22
import sys
3+
import logging
34

45
__all__ = ["SlidingWindowMapBuffer"]
56

@@ -10,6 +11,9 @@
1011
bytes = str # @ReservedAssignment
1112

1213

14+
log = logging.getLogger(__name__)
15+
16+
1317
class SlidingWindowMapBuffer(object):
1418

1519
"""A buffer like object which allows direct byte-wise object and slicing into
@@ -29,11 +33,12 @@ class SlidingWindowMapBuffer(object):
2933
__slots__ = (
3034
'_c', # our cursor
3135
'_size', # our supposed size
36+
'_entered', # entry/exit accounting
3237
)
3338

3439
def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
35-
"""Initalize the instance to operate on the given cursor.
36-
:param cursor: if not None, the associated cursor to the file you want to access
40+
"""Initialize the instance to operate on the given cursor.
41+
:param cursor: The associated cursor to the file you want to access
3742
If None, you have call begin_access before using the buffer and provide a cursor
3843
:param offset: absolute offset in bytes
3944
:param size: the total size of the mapping. Defaults to the maximum possible size
@@ -43,24 +48,49 @@ def __init__(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
4348
Hence it is in your own interest to provide a proper size !
4449
:param flags: Additional flags to be passed to os.open
4550
:raise ValueError: if the buffer could not achieve a valid state"""
51+
if not cursor:
52+
raise ValueError("Cursor cannot be null!")
4653
self._c = cursor
47-
if cursor and not self.begin_access(cursor, offset, size, flags):
48-
raise ValueError("Failed to allocate the buffer - probably the given offset is out of bounds")
49-
# END handle offset
50-
51-
def __del__(self):
52-
self.end_access()
54+
self._entered = 0
55+
56+
if cursor.is_associated() and cursor.use_region(offset, size, flags).is_valid():
57+
# if given size is too large or default, we computer a proper size
58+
# If its smaller, we assume the combination between offset and size
59+
# as chosen by the user is correct and use it !
60+
# If not, the user is in trouble.
61+
if size > cursor.file_size():
62+
size = cursor.file_size() - offset
63+
# END handle size
64+
self._size = size
65+
else:
66+
raise ValueError("Cursor %s not associated or mapping region failed!" % cursor)
5367

5468
def __enter__(self):
69+
assert self._entered >= 0, self._entered
70+
self._entered += 1
5571
return self
5672

5773
def __exit__(self, exc_type, exc_value, traceback):
58-
self.end_access()
74+
assert self._entered >= 0, self._entered
75+
self._entered -= 1
76+
if self._entered == 0:
77+
self.close()
78+
79+
def __del__(self):
80+
if self._entered != 0:
81+
log.warning("Missed %s exit(s) on %s!" % (self._entered, self))
82+
self.close()
83+
84+
def _check_if_entered(self):
85+
if self._entered <= 0:
86+
raise ValueError('Context-manager %s not entered!' % self)
5987

6088
def __len__(self):
6189
return self._size
6290

6391
def __getitem__(self, i):
92+
self._check_if_entered()
93+
6494
if isinstance(i, slice):
6595
return self.__getslice__(i.start or 0, i.stop or self._size)
6696
c = self._c
@@ -73,6 +103,8 @@ def __getitem__(self, i):
73103
return c.buffer()[i - c.ofs_begin()]
74104

75105
def __getslice__(self, i, j):
106+
self._check_if_entered()
107+
76108
c = self._c
77109
# fast path, slice fully included - safes a concatenate operation and
78110
# should be the default
@@ -124,44 +156,16 @@ def __getslice__(self, i, j):
124156
# END fast or slow path
125157
#{ Interface
126158

127-
def begin_access(self, cursor=None, offset=0, size=sys.maxsize, flags=0):
128-
"""Call this before the first use of this instance. The method was already
129-
called by the constructor in case sufficient information was provided.
130-
131-
For more information no the parameters, see the __init__ method
132-
:param path: if cursor is None the existing one will be used.
133-
:return: True if the buffer can be used"""
134-
if cursor:
135-
self._c = cursor
136-
# END update our cursor
137-
138-
# reuse existing cursors if possible
139-
if self._c is not None and self._c.is_associated():
140-
res = self._c.use_region(offset, size, flags).is_valid()
141-
if res:
142-
# if given size is too large or default, we computer a proper size
143-
# If its smaller, we assume the combination between offset and size
144-
# as chosen by the user is correct and use it !
145-
# If not, the user is in trouble.
146-
if size > self._c.file_size():
147-
size = self._c.file_size() - offset
148-
# END handle size
149-
self._size = size
150-
# END set size
151-
return res
152-
# END use our cursor
153-
return False
154-
155-
def end_access(self):
159+
def close(self):
156160
"""Call this method once you are done using the instance. It is automatically
157161
called on destruction, and should be called just in time to allow system
158162
resources to be freed.
159163
160-
Once you called end_access, you must call begin access before reusing this instance!"""
161-
self._size = 0
162-
if self._c is not None:
164+
Once you called close, you must call begin access before reusing this instance!"""
165+
if self._c:
163166
self._c.unuse_region()
164-
# END unuse region
167+
self._c = None
168+
self._size = 0
165169

166170
def cursor(self):
167171
""":return: the currently set cursor which provides access to the data"""

smmap/mman.py

+5
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,11 @@ def __exit__(self, exc_type, exc_value, traceback):
359359
if leaft_overs:
360360
log.debug("Cleaned up %s left-over mmap-regions." % leaft_overs)
361361

362+
def __del__(self):
363+
if self._entered != 0:
364+
log.warning("Missed %s exit(s) on %s!" % (self._entered, self))
365+
self.close()
366+
362367
def close(self):
363368
self.collect()
364369

0 commit comments

Comments
 (0)