Skip to content

Commit a41b6cc

Browse files
committed
Import Massif parser from MathieuTurcotte/msparser
Just import the minimum required for our future use from https://github.com/MathieuTurcotte/msparser at 8ce7336d9b55366. This code is MIT licensed (license file is included).
1 parent 301cd46 commit a41b6cc

File tree

2 files changed

+297
-0
lines changed

2 files changed

+297
-0
lines changed

scripts/memory-test/LICENSE

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright (C) 2011 Mathieu Turcotte
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy of
4+
this software and associated documentation files (the "Software"), to deal in
5+
the Software without restriction, including without limitation the rights to
6+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7+
of the Software, and to permit persons to whom the Software is furnished to do
8+
so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in all
11+
copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19+
SOFTWARE.

scripts/memory-test/msparser.py

Lines changed: 278 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,278 @@
1+
# Copyright (c) 2011 Mathieu Turcotte
2+
# Licensed under the MIT license.
3+
4+
"""
5+
The msparser module offers a simple interface to parse the Valgrind massif.out
6+
file format, i.e. data files produced the Valgrind heap profiler.
7+
"""
8+
9+
from __future__ import with_statement # Enable with statement in Python 2.5.
10+
import os.path
11+
import re
12+
13+
__all__ = ["parse", "parse_file", "ParseError"]
14+
15+
# Precompiled regex used to parse comments.
16+
_COMMENT_RE = re.compile("\s*(#|$)")
17+
18+
# Precompiled regexes used to parse header fields.
19+
_FIELD_DESC_RE = re.compile("desc:\s(?P<data>.*)$")
20+
_FIELD_CMD_RE = re.compile("cmd:\s(?P<data>.*)$")
21+
_FIELD_TIME_UNIT_RE = re.compile("time_unit:\s(?P<data>ms|B|i)$")
22+
23+
# Precompiled regexes used to parse snaphot fields.
24+
_FIELD_SNAPSHOT_RE = re.compile("snapshot=(?P<data>\d+)")
25+
_FIELD_TIME_RE = re.compile("time=(?P<data>\d+)")
26+
_FIELD_MEM_HEAP_RE = re.compile("mem_heap_B=(?P<data>\d+)")
27+
_FIELD_MEM_EXTRA_RE = re.compile("mem_heap_extra_B=(?P<data>\d+)")
28+
_FIELD_MEM_STACK_RE = re.compile("mem_stacks_B=(?P<data>\d+)")
29+
_FIELD_HEAP_TREE_RE = re.compile("heap_tree=(?P<data>\w+)")
30+
31+
# Precompiled regex to parse heap entries. Matches three things:
32+
# - the number of children,
33+
# - the number of bytes,
34+
# - and the details section.
35+
_HEAP_ENTRY_RE = re.compile("""
36+
\s*n # skip zero or more spaces, then 'n'
37+
(?P<num_children>\d+) # match number of children, 1 or more digits
38+
:\s # skip ':' and one space
39+
(?P<num_bytes>\d+) # match the number of bytes, 1 or more digits
40+
\s # skip one space
41+
(?P<details>.*) # match the details
42+
""", re.VERBOSE)
43+
44+
# Precompiled regex to check if the details section is below threshold.
45+
_HEAP_BELOW_THRESHOLD_RE = re.compile(r"""in.*places?.*""")
46+
47+
# Precompiled regex to parse the details section of entries above threshold.
48+
# This should match four things:
49+
# - the hexadecimal address,
50+
# - the function name,
51+
# - the file name or binary path, i.e. file.cpp or usr/local/bin/foo.so,
52+
# - and a line number if present.
53+
# Last two parts are optional to handle entries without a file name or binary
54+
# path.
55+
_HEAP_DETAILS_RE = re.compile(r"""
56+
(?P<address>[a-fA-F0-9x]+) # match the hexadecimal address
57+
:\s # skip ': '
58+
(?P<function>.+?) # match the function's name, non-greedy
59+
(?: # don't capture fname/line group
60+
\s
61+
\(
62+
(?:in\s)? # skip 'in ' if present
63+
(?P<fname>[^:]+) # match the file name
64+
:? # skip ':', if present
65+
(?P<line>\d+)? # match the line number, if present
66+
\)
67+
)? # fname/line group is optional
68+
$ # should have reached the EOL
69+
""", re.VERBOSE)
70+
71+
72+
class ParseContext:
73+
"""
74+
A simple context for parsing. Dumbed down version of fileinput.
75+
"""
76+
def __init__(self, fd):
77+
self._fd = fd
78+
self._line = 0
79+
80+
def line(self):
81+
return self._line
82+
83+
def readline(self):
84+
self._line += 1
85+
return self._fd.readline()
86+
87+
def filename(self):
88+
return os.path.abspath(self._fd.name)
89+
90+
91+
class ParseError(Exception):
92+
"""
93+
Error raised when a parsing error is encountered.
94+
"""
95+
def __init__(self, msg, ctx):
96+
self.msg = msg
97+
self.line = ctx.line()
98+
self.filename = ctx.filename()
99+
100+
def __str__(self):
101+
return " ".join([str(self.msg), 'at line', str(self.line), 'in',
102+
str(self.filename)])
103+
104+
105+
def parse_file(filepath):
106+
"""
107+
Convenience function taking a file path instead of a file descriptor.
108+
"""
109+
with open(filepath) as fd:
110+
return parse(fd)
111+
112+
113+
def parse(fd):
114+
"""
115+
Parse an already opened massif output file.
116+
"""
117+
mdata = {}
118+
ctx = ParseContext(fd)
119+
_parse_header(ctx, mdata)
120+
_parse_snapshots(ctx, mdata)
121+
return mdata
122+
123+
124+
def _match_unconditional(ctx, regex, string):
125+
"""
126+
Unconditionaly match a regular expression against a string, i.e. if there
127+
is no match we raise a ParseError.
128+
"""
129+
match = regex.match(string)
130+
if match is None:
131+
raise ParseError("".join(["can't match '", string, "' against '",
132+
regex.pattern, "'"]), ctx)
133+
return match
134+
135+
136+
def _get_next_line(ctx, may_reach_eof=False):
137+
"""
138+
Read another line from ctx. If may_reach_eof is False, reaching EOF will
139+
be considered as an error.
140+
"""
141+
line = ctx.readline() # Returns an empty string on EOF.
142+
143+
if len(line) == 0:
144+
if may_reach_eof is False:
145+
raise ParseError("unexpected EOF", ctx)
146+
else:
147+
return None
148+
else:
149+
return line.strip("\n")
150+
151+
152+
def _get_next_field(ctx, field_regex, may_reach_eof=False):
153+
"""
154+
Read the next data field. The field_regex arg is a regular expression that
155+
will be used to match the field. Data will be extracted from the match
156+
object by calling m.group('data'). If may_reach_eof is False, reaching EOF
157+
will be considered as an error.
158+
"""
159+
line = _get_next_line(ctx, may_reach_eof)
160+
while line is not None:
161+
if _COMMENT_RE.match(line):
162+
line = _get_next_line(ctx, may_reach_eof)
163+
else:
164+
match = _match_unconditional(ctx, field_regex, line)
165+
return match.group("data")
166+
167+
return None
168+
169+
170+
def _parse_header(ctx, mdata):
171+
mdata["desc"] = _get_next_field(ctx, _FIELD_DESC_RE)
172+
mdata["cmd"] = _get_next_field(ctx, _FIELD_CMD_RE)
173+
mdata["time_unit"] = _get_next_field(ctx, _FIELD_TIME_UNIT_RE)
174+
175+
176+
def _parse_snapshots(ctx, mdata):
177+
index = 0
178+
snapshots = []
179+
detailed_snapshot_indices = []
180+
peak_snapshot_index = None
181+
182+
snapshot = _parse_snapshot(ctx)
183+
184+
while snapshot is not None:
185+
if snapshot["is_detailed"]:
186+
detailed_snapshot_indices.append(index)
187+
if snapshot["is_peak"]:
188+
peak_snapshot_index = index
189+
snapshots.append(snapshot["data"])
190+
snapshot = _parse_snapshot(ctx)
191+
index += 1
192+
193+
mdata["snapshots"] = snapshots
194+
mdata["detailed_snapshot_indices"] = detailed_snapshot_indices
195+
196+
if peak_snapshot_index is not None:
197+
mdata["peak_snapshot_index"] = peak_snapshot_index
198+
199+
200+
def _parse_snapshot(ctx):
201+
"""
202+
Parse another snapshot, appending it to the mdata["snapshots"] list. On
203+
EOF, False will be returned.
204+
"""
205+
snapshot_id = _get_next_field(ctx, _FIELD_SNAPSHOT_RE, may_reach_eof=True)
206+
207+
if snapshot_id is None:
208+
return None
209+
210+
snapshot_id = int(snapshot_id)
211+
time = int(_get_next_field(ctx, _FIELD_TIME_RE))
212+
mem_heap = int(_get_next_field(ctx, _FIELD_MEM_HEAP_RE))
213+
mem_heap_extra = int(_get_next_field(ctx, _FIELD_MEM_EXTRA_RE))
214+
mem_stacks = int(_get_next_field(ctx, _FIELD_MEM_STACK_RE))
215+
heap_tree_field = _get_next_field(ctx, _FIELD_HEAP_TREE_RE)
216+
217+
heap_tree = None
218+
is_detailed = False
219+
is_peak = False
220+
221+
if heap_tree_field != "empty":
222+
is_detailed = True
223+
if heap_tree_field == "peak":
224+
is_peak = True
225+
heap_tree = _parse_heap_tree(ctx)
226+
227+
return {
228+
"is_detailed": is_detailed,
229+
"is_peak": is_peak,
230+
"data": {
231+
"id": snapshot_id,
232+
"time": time,
233+
"mem_heap": mem_heap,
234+
"mem_heap_extra": mem_heap_extra,
235+
"mem_stack": mem_stacks,
236+
"heap_tree": heap_tree
237+
}
238+
}
239+
240+
241+
def _parse_heap_tree(ctx):
242+
"""
243+
Parse a heap tree.
244+
"""
245+
line = _get_next_line(ctx)
246+
247+
entry_match = _match_unconditional(ctx, _HEAP_ENTRY_RE, line)
248+
details_group = entry_match.group("details")
249+
250+
details = None
251+
details_match = _HEAP_DETAILS_RE.match(details_group)
252+
253+
if details_match:
254+
# The 'line' field could be None if the binary/library wasn't compiled
255+
# with debug info. To avoid errors on this condition, we need to make
256+
# sure that the 'line' field is not None before trying to convert it to
257+
# an integer.
258+
linum = details_match.group(4)
259+
if linum is not None:
260+
linum = int(linum)
261+
262+
details = {
263+
"address": details_match.group("address"),
264+
"function": details_match.group("function"),
265+
"file": details_match.group("fname"),
266+
"line": linum
267+
}
268+
269+
children = []
270+
for i in range(0, int(entry_match.group("num_children"))):
271+
children.append(_parse_heap_tree(ctx))
272+
273+
heap_node = {}
274+
heap_node["nbytes"] = int(entry_match.group("num_bytes"))
275+
heap_node["children"] = children
276+
heap_node["details"] = details
277+
278+
return heap_node

0 commit comments

Comments
 (0)