Skip to content

Commit ed99766

Browse files
committed
Don't read log files more than once when parsing
1 parent 643a174 commit ed99766

File tree

1 file changed

+50
-37
lines changed

1 file changed

+50
-37
lines changed

vtr_flow/scripts/python_libs/vtr/parse_vtr_flow.py

Lines changed: 50 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import sys
66
from pathlib import Path
77
import glob
8-
from collections import OrderedDict
8+
from collections import OrderedDict, defaultdict
99

1010
# pylint: disable=wrong-import-position
1111
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
@@ -15,21 +15,52 @@
1515
# pylint: enable=wrong-import-position
1616

1717

18+
def parse_file_and_update_results(filename, patterns, results):
19+
20+
"""
21+
Find filename, and then look through for the matching patterns, updating results
22+
"""
23+
24+
# We interpret the parse pattern's filename as a glob pattern
25+
filepaths = glob.glob(filename)
26+
27+
if len(filepaths) > 1:
28+
raise vtr.InspectError(
29+
"File pattern '{}' is ambiguous ({} files matched)".format(filename, len(filepaths)),
30+
len(filepaths),
31+
filepaths,
32+
)
33+
34+
if len(filepaths) == 1:
35+
assert Path(filepaths[0]).exists
36+
37+
with open(filepaths[0], "r") as file:
38+
for line in file:
39+
while line[0] == "#":
40+
line = line[1:]
41+
42+
for parse_pattern in patterns:
43+
match = parse_pattern.regex().match(line)
44+
if match and match.groups():
45+
# Extract the first group value
46+
results[parse_pattern] = match.groups()[0]
47+
48+
1849
def parse_vtr_flow(arg_list):
1950
"""
2051
parse vtr flow output
2152
"""
2253
parse_path = arg_list[0]
2354
parse_config_file = arg_list[1]
24-
parse_config_file = vtr.util.verify_file(parse_config_file, "parse config")
25-
2655
extra_params = arg_list[2:]
56+
57+
parse_config_file = vtr.util.verify_file(parse_config_file, "parse config")
2758
if parse_config_file is None:
2859
parse_config_file = str(paths.vtr_benchmarks_parse_path)
2960

3061
parse_patterns = vtr.load_parse_patterns(str(parse_config_file))
3162

32-
metrics = OrderedDict()
63+
results = OrderedDict()
3364

3465
extra_params_parsed = OrderedDict()
3566

@@ -38,49 +69,31 @@ def parse_vtr_flow(arg_list):
3869
extra_params_parsed[key] = value
3970
print(key, end="\t")
4071

41-
# Set defaults
4272
for parse_pattern in parse_patterns.values():
43-
metrics[parse_pattern.name()] = (
44-
parse_pattern.default_value() if parse_pattern.default_value() is not None else ""
73+
# Set defaults
74+
results[parse_pattern] = (
75+
parse_pattern.default_value() if parse_pattern.default_value() is not None else "-1"
4576
)
77+
78+
# Print header row
4679
print(parse_pattern.name(), end="\t")
4780
print("")
4881

4982
for key, value in extra_params_parsed.items():
5083
print(value, end="\t")
5184

52-
# Process each pattern
85+
# Group parse patterns by filename so that we only need to read each log file from disk once
86+
parse_patterns_by_filename = defaultdict(list)
5387
for parse_pattern in parse_patterns.values():
88+
parse_patterns_by_filename[parse_pattern.filename()].append(parse_pattern)
5489

55-
# We interpret the parse pattern's filename as a glob pattern
56-
filepaths = glob.glob(str(Path(parse_path) / parse_pattern.filename()))
57-
58-
if len(filepaths) > 1:
59-
raise vtr.InspectError(
60-
"File pattern '{}' is ambiguous ({} files matched)".format(
61-
parse_pattern.filename(), len(filepaths)
62-
),
63-
len(filepaths),
64-
filepaths,
65-
)
66-
67-
if len(filepaths) == 1:
68-
69-
assert Path(filepaths[0]).exists
70-
metrics[parse_pattern.name()] = "-1"
71-
with open(filepaths[0], "r") as file:
72-
for line in file:
73-
while line[0] == "#":
74-
line = line[1:]
75-
match = parse_pattern.regex().match(line)
76-
if match and match.groups():
77-
# Extract the first group value
78-
metrics[parse_pattern.name()] = match.groups()[0]
79-
print(metrics[parse_pattern.name()], end="\t")
80-
else:
81-
# No matching file, skip
82-
print("-1", end="\t")
83-
assert len(filepaths) == 0
90+
# Process each pattern
91+
for filename, patterns in parse_patterns_by_filename.items():
92+
parse_file_and_update_results(str(Path(parse_path) / filename), patterns, results)
93+
94+
# Print results
95+
for parse_pattern in parse_patterns.values():
96+
print(results[parse_pattern], end="\t")
8497
print("")
8598

8699
return 0

0 commit comments

Comments
 (0)