-
-
Notifications
You must be signed in to change notification settings - Fork 46.7k
/
Copy pathmemory_allocator.py
645 lines (530 loc) · 18.9 KB
/
memory_allocator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
"""Memory allocator that provides an interface similar to the C standard library:
https://en.wikipedia.org/wiki/C_dynamic_memory_allocation . It uses a bytearray to
simulate the heap, which is controlled similarly to the sbrk() syscall.
It makes a heavy use of inheritance to progressively increase complexity without
duplicating code across implementations. That lowers barrier entry and allows the
comparison of test outputs, as all variations share the same interface.
Based on https://github.com/danluu/malloc-tutorial but added features like a more
advanced realloc() or alignment. Additionally, metadata is stored outside heap. This
design choice simplifies implementation in Python, since serialization and parsing
requires more steps compared to C, where such operations can be handled with simple
struct casting.
"""
import operator
from collections.abc import Iterable, Iterator
from dataclasses import dataclass, field
from typing import Self
class Heap:
"""The heap is a chunk of the virtual address space, typically used by libc for
dynamic memory allocation. Its beginning is fixed, but its end can move, allowing it
to grow or shrink. The point where heap ends is known as the 'program break'.
https://en.wikipedia.org/wiki/Data_segment#Heap
This is a simulation implemented as a constrained wrapper around a bytearray. It can
only add or remove elements at the end.
"""
def __init__(self):
self.data = bytearray()
def print(self, show_offset=False, bytes_per_row=4):
"""Pretty hexadecimal representation.
>>> heap = Heap()
>>> heap.data = bytearray(range(7))
>>> heap.print()
00 01 02 03
04 05 06
>>> heap.print(show_offset=True)
0x00: 00 01 02 03
0x04: 04 05 06
"""
for offset in range(0, len(self.data), bytes_per_row):
if show_offset:
print(f"0x{offset:02X}: ", end="")
values = " ".join(
f"{c:02X}" for c in self.data[offset : offset + bytes_per_row]
)
print(values)
def sbrk(self, increment: int) -> int:
"""Change the location of the program break and returns the previous location.
Analogous to the C function with the same name.
If the allocator calls this method with a positive/negative number, we
will say that it has requested/returned memory to the operative system.
>>> heap = Heap()
>>> heap.data
bytearray(b'')
>>> heap.sbrk(2)
0
>>> heap.print()
2E 2E
>>> heap.sbrk(4)
2
>>> heap.print()
2E 2E 2E 2E
2E 2E
To shrink pass a negative value:
>>> heap.sbrk(-5)
6
>>> heap.print()
2E
"""
previous_program_break = len(self.data)
if increment > 0:
# Filling with something recognizable to facilitate debugging
self.data.extend(b"." * increment)
else:
del self.data[:-increment]
return previous_program_break
def __len__(self):
# This value should be minimized by a good memory allocator, as the OS uses
# unallocated memory for other processes.
return len(self.data)
def __getitem__(self, index):
# The IndexError exception raised here is analogous to the 'segmentation
# fault' signal emitted by the OS
return self.data[index]
def __setitem__(self, index, value: int):
if isinstance(index, slice):
# The ability of slice assignment to add or remove elements in the middle,
# altering the size of the bytearray, is not desired.
raise ValueError("Can not use slice assignment")
self.data[index] = value
def strcpy(self, dst: int, src: Iterable[int]):
"""Copy from the given external bytes, to the specified location in the heap.
The name is derived from the C function.
>>> heap = Heap()
>>> heap.data = bytearray(range(8))
>>> heap.print()
00 01 02 03
04 05 06 07
>>> heap.strcpy(3, [255] * 3)
>>> heap.print()
00 01 02 FF
FF FF 06 07
>>> heap.strcpy(4, range(5))
Traceback (most recent call last):
...
IndexError: bytearray index out of range
"""
for index, value in enumerate(src):
self.data[dst + index] = value
@dataclass
class MemoryAllocatorInterface:
"""These 3 methods are the most fundamental operations in
https://en.wikipedia.org/wiki/C_dynamic_memory_allocation#Overview_of_functions
All other methods added by subsequent implementations are either considered private
or helpers for testing.
"""
heap: Heap = field(default_factory=Heap)
def malloc(self, size: int) -> int:
"""Allocate memory and returns the position in the heap where allocated memory
starts.
It is the client's responsibility not to access memory out of bounds.
"""
raise NotImplementedError
def free(self, pos: int):
"""Returns the block to the allocator so it can be reused. `pos` should
be a value returned by malloc()
It is client's responsibility not to access the region after this call.
"""
raise NotImplementedError
def realloc(self, pos: int, size: int) -> int:
"""Changes the size of an allocated region, either increasing or decreasing it
Since there may not be enough space in the surrounding area to grow, it may move
and return the position of the new location.
"""
raise NotImplementedError
class MemoryAllocatorSimplest(MemoryAllocatorInterface):
"""
>>> heap = Heap()
>>> allocator = MemoryAllocatorSimplest(heap)
>>> allocator.malloc(4)
0
>>> heap.strcpy(0, b'0000')
>>> heap.print()
30 30 30 30
>>> allocator.malloc(2)
4
>>> heap.strcpy(4, b'11')
>>> heap.print()
30 30 30 30
31 31
So simple that it does not support free
>>> allocator.free(4)
Traceback (most recent call last):
...
NotImplementedError
"""
def malloc(self, size):
return self.heap.sbrk(size)
@dataclass
class MemoryAllocatorFree(MemoryAllocatorInterface):
"""Support free() by storing the size of each allocated and freed block. This
metadata will be stored in a linked list outside of the heap.
>>> allocator = MemoryAllocatorFree()
>>> _, p1, _ = [allocator.malloc_and_copy(d) for d in (b'00', b'1111', b'2222')]
>>> allocator.heap.print()
30 30 31 31
31 31 32 32
32 32
>>> p1
2
>>> allocator.free(p1)
>>> allocator.print()
pos size free data
0 2 b'00'
2 4 Y b'1111'
6 4 b'2222'
>>> allocator.malloc_and_copy(b'333')
2
>>> allocator.print()
pos size free data
0 2 b'00'
2 4 b'3331'
6 4 b'2222'
The second block was reused, instead of growing the heap.
"""
@dataclass
class Block:
size: int
pos: int
free: bool = False
next: Self | None = None
@property
def is_last(self):
return self.next is None
first: Block | None = None
def malloc(self, size):
# Try to reuse an existing free block
block = self.find_fit_block(size) or self.request_space(size)
block.free = False
return block.pos
def free_block(self, block):
assert not block.free
block.free = True
def free(self, pos):
self.free_block(self.get(pos))
def get(self, pos):
# It would be quicker if a Dict[pos,Block] were maintained.
return next(block for block in self.blocks if block.pos == pos)
def find_fit_block(self, size) -> Block | None:
try:
return next(self.find_fit_blocks(size))
except StopIteration:
return None
def find_fit_blocks(self, size) -> Iterator[Block]:
return (block for block in self.blocks if block.free and block.size >= size)
@property
def blocks(self) -> Iterator[Block]:
block = self.first
while block:
yield block
block = block.next
@property
def is_empty(self):
return self.first is None
def get_last(self):
*_, last = self.blocks
return last
def request_space(self, size) -> Block:
"""Grow the heap by requesting space to the OS."""
new_block = self.Block(size=size, pos=self.heap.sbrk(size))
self.linked_list_append(new_block)
return new_block
def linked_list_append(self, block):
if self.is_empty:
self.first = block
else:
last = self.get_last()
last.next = block
block.next = None
def print(self):
"""To facilitate debugging"""
fmt = "{pos:>3} {size:>4} {free:>4} {data}"
print(fmt.format(free="free", pos="pos", size="size", data="data"))
for block in self.blocks:
print(
fmt.format(
free="Y" if block.free else "",
pos=block.pos,
size=block.size,
data=bytes(self.heap[block.pos : block.pos + block.size]),
)
)
def malloc_and_copy(self, data: bytes) -> int:
"""Helper for tests"""
pos = self.malloc(len(data))
self.heap.strcpy(pos, data)
return pos
class MemoryAllocatorSplit(MemoryAllocatorFree):
"""When reusing a block, split if it's larger than needed.
>>> allocator = MemoryAllocatorSplit()
>>> allocator.free(allocator.malloc(4))
>>> allocator.print()
pos size free data
0 4 Y b'....'
>>> allocator.malloc(2)
0
>>> allocator.print()
pos size free data
0 2 b'..'
2 2 Y b'..'
>>> allocator.malloc(2)
2
"""
def malloc(self, size):
block = self.find_fit_block(size)
if block:
self.split(block, size)
else:
block = self.request_space(size)
block.free = False
return block.pos
def split(self, block, size):
if block.size <= size:
return None
new_block = self.Block(
size=block.size - size, free=True, pos=block.pos + size, next=block.next
)
block.size = size # shrink
block.next = new_block # insert in list
return new_block
class MemoryAllocatorMerge(MemoryAllocatorSplit):
"""When freeing a block, attempt to merge it with the next one if it's free.
>>> allocator = MemoryAllocatorMerge()
>>> # allocator = MemoryAllocatorSplit() # uncomment to compare
>>> p0 = allocator.malloc(2)
>>> p1 = allocator.malloc(2)
>>> allocator.free(p1)
>>> allocator.free(p0)
>>> allocator.print()
pos size free data
0 4 Y b'....'
>>> allocator.malloc(4)
0
"""
def free_block(self, block):
super().free_block(block)
self.optimize_after_free(block)
def optimize_after_free(self, block):
self.merge_with_next(block)
def merge_with_next(self, block):
if block.next is None or not block.next.free:
return
block.size += block.next.size
block.next = block.next.next
def split(self, block, size):
if new_block := super().split(block, size):
self.optimize_after_free(new_block)
return new_block
class MemoryAllocatorMergePrevious(MemoryAllocatorMerge):
"""Similar to the previous implementation, but also checks the previous block for
merging.
>>> allocator = MemoryAllocatorMergePrevious()
>>> # allocator = MemoryAllocatorMerge() # uncomment to compare
>>> p0 = allocator.malloc(2)
>>> p1 = allocator.malloc(2)
This time we revert the order to use the new feature:
>>> allocator.free(p0)
>>> allocator.free(p1)
>>> allocator.malloc(4)
0
"""
# Now, the list must be doubly linked.
@dataclass
class Block(MemoryAllocatorMerge.Block):
prev: Self | None = None
@property
def is_first(self):
return self.prev is None
def optimize_after_free(self, block):
super().optimize_after_free(block)
if block.prev and block.prev.free:
self.merge_with_next(block.prev)
def split(self, block, size):
if new_block := super().split(block, size):
new_block.prev = block
return new_block
def linked_list_append(self, block):
old_last = self.get_last() if not self.is_empty else None
super().linked_list_append(block)
block.prev = old_last
class MemoryAllocatorAlign(MemoryAllocatorMergePrevious):
"""The address returned by malloc() is a multiple of 4.
Data alignment improves performance at the CPU instruction level. That benefit is
not demonstrated here. More info:
https://en.wikipedia.org/wiki/Data_structure_alignment
>>> allocator_this = MemoryAllocatorAlign()
>>> allocator_prev = MemoryAllocatorMergePrevious()
>>> for data in [b'333', b'4444', b'55555', b'1']:
... _ = allocator_this.malloc_and_copy(data)
... _ = allocator_prev.malloc_and_copy(data)
>>> allocator_prev.heap.print(show_offset=True)
0x00: 33 33 33 34
0x04: 34 34 34 35
0x08: 35 35 35 35
0x0C: 31
>>> allocator_this.heap.print(show_offset=True)
0x00: 33 33 33 2E
0x04: 34 34 34 34
0x08: 35 35 35 35
0x0C: 35 2E 2E 2E
0x10: 31 2E 2E 2E
The downside is that it takes more space.
"""
@staticmethod
def align(value: int, boundary=4) -> int:
"""
>>> align = MemoryAllocatorAlign.align
>>> align(3)
4
>>> align(4)
4
>>> align(5)
8
"""
res = (value // boundary) * boundary
if res < value:
res += boundary
return res
def malloc(self, size):
return super().malloc(self.align(size))
class MemoryAllocatorRealloc(MemoryAllocatorMergePrevious):
"""
>>> allocator = MemoryAllocatorRealloc()
>>> [allocator.malloc_and_copy(d) for d in (b'AA', b'BBBB')]
[0, 2]
>>> allocator.print()
pos size free data
0 2 b'AA'
2 4 b'BBBB'
>>> allocator.realloc(0, size=3)
6
>>> allocator.print()
pos size free data
0 2 Y b'AA'
2 4 b'BBBB'
6 3 b'AA.'
"""
def realloc(self, pos, size):
return self.realloc_block(self.get(pos), size)
def realloc_block(self, block, size):
# Simplest implementation: always copy
new_pos = self.malloc(size)
self.realloc_copy(dst=new_pos, src=block.pos, size=min(size, block.size))
self.free_block(block)
return new_pos
def realloc_copy(self, dst: int, src: int, size):
for i in range(size):
self.heap[dst + i] = self.heap[src + i]
class MemoryAllocatorReallocShrink(MemoryAllocatorRealloc):
"""When a decrease is requested, avoid the potentially expensive call to
realloc_copy()
>>> allocator = MemoryAllocatorReallocShrink()
>>> # allocator = MemoryAllocatorRealloc() # uncomment to compare
>>> allocator.malloc(4)
0
>>> allocator.realloc(0, size=2)
0
>>> allocator.print()
pos size free data
0 2 b'..'
2 2 Y b'..'
"""
def realloc_block(self, block, new_size):
if new_size <= block.size:
self.split(block, new_size)
return block.pos
else:
return super().realloc_block(block, new_size)
class MemoryAllocatorReallocExtend(MemoryAllocatorRealloc):
"""When more space is needed and the next block is free, try extending the current
block instead of copying to a new location.
>>> allocator = MemoryAllocatorReallocExtend()
>>> allocator.malloc(2)
0
>>> allocator.free(allocator.malloc(2))
>>> allocator.print()
pos size free data
0 2 b'..'
2 2 Y b'..'
>>> allocator.realloc(0, size=4)
0
>>> allocator.print()
pos size free data
0 4 b'....'
"""
def realloc_block(self, block, new_size):
increase = new_size - block.size
if (
increase > 0
and block.next
and block.next.free
and block.next.size >= increase
):
self.merge_with_next(block)
self.split(block, new_size)
return block.pos
return super().realloc_block(block, new_size)
class MemoryAllocatorReturnsMemoryToOS(MemoryAllocatorMergePrevious):
"""
>>> allocator = MemoryAllocatorReturnsMemoryToOS()
>>> [allocator.malloc(1) for i in range(3)]
[0, 1, 2]
>>> allocator.free(1)
>>> allocator.print()
pos size free data
0 1 b'.'
1 1 Y b'.'
2 1 b'.'
>>> allocator.free(2)
>>> allocator.print()
pos size free data
0 1 b'.'
"""
def optimize_after_free(self, block):
super().optimize_after_free(block)
if block.is_last:
self.shrink_heap()
def shrink_heap(self):
last = self.get_last() if not self.is_empty else None
if last is not None and last.free:
self.linked_list_remove_last()
self.heap.sbrk(-last.size)
def linked_list_remove_last(self):
last = self.get_last()
if last.is_first:
# There was only this block,
# the list is now empty.
self.first = None
else:
new_last = last.prev
new_last.next = None
class MemoryAllocatorBestFit(MemoryAllocatorFree):
"""When multiple blocks fit the requested size, choose the smallest one instead of
the first one found.
>>> allocator = MemoryAllocatorBestFit()
>>> # allocator = MemoryAllocatorFree() # uncomment to compare
>>> p0, _, p2, _ = map(allocator.malloc_and_copy, [b"A" * 8, b"B", b"CC", b"D"])
>>> allocator.free(p0)
>>> allocator.free(p2)
>>> allocator.print()
pos size free data
0 8 Y b'AAAAAAAA'
8 1 b'B'
9 2 Y b'CC'
11 1 b'D'
>>> allocator.malloc(2)
9
>>> allocator.malloc(8)
0
If you repeat the experiment with the previous implementation you will see that it
has to grow the heap during the last malloc(), while this one doesn't. The reason is
fragmentation: both have the same total free space, but divided in smaller holes,
that can not be merged because they aren't contiguous.
https://en.wikipedia.org/wiki/Fragmentation_(computing)#External_fragmentation
"""
def find_fit_block(self, size):
"""Override first-fit implementation by best-fit."""
blocks = list(self.find_fit_blocks(size))
if not blocks:
return None
blocks.sort(key=operator.attrgetter("size"))
return blocks[0]