Skip to content

Commit 7714107

Browse files
author
MohamedElgammal
committed
Adding a script that control the tuning runs with multiple tuned parameters and efficiently parse the resulting results
1 parent 49de5fb commit 7714107

File tree

2 files changed

+228
-0
lines changed

2 files changed

+228
-0
lines changed
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
A script used to run tuning experiments with multiple parameters.
2+
3+
Steps to use:
4+
=============
5+
1) edit the first section of the script by setting `params_dict` dictionary to the parameters that you want to sweep and the corresponding values that you want to try. If you want the resulting spreadheet to include specific metrics, set `keep_metrics_only` variable to `True` and the metrics that you care about in `parsed_metrics`. If you want the full parsed result sheet, set `keep_metrics_only` to `False`
6+
7+
2) run the script as follows:
8+
'''
9+
python control_runs.py --generate <path_to_task_to_run>
10+
'''
11+
12+
This will edit the `config.txt` file of this task adding several lines `script_params_list_add` for each of the combinations of the input params
13+
14+
3) Launch the task using `run_vtr_task.py` script
15+
4) When the run is done, run the script to parse the results as follows:
16+
'''
17+
python control_runs.py --parse <path_to_task_to_parse>
18+
'''
19+
20+
The script will generate 3 csv files in the runXXX idrectory of the task as follows:
21+
- `full_res.csv` that exactly matches parse_results.txt but in csv format
22+
- `avg_seed.csv` that averages the results of the each circuit with one set of parameters over the different seed values
23+
- `geomean_res.csv` that geometrically average the results of all the circuits over the same set of parameters
24+
- `summary.xlsx` that merges all the previously mentioned sheets in a single spreadsheet
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
import itertools
2+
import os
3+
import sys
4+
import csv
5+
import pandas as pd
6+
import numpy as np
7+
from scipy import stats
8+
9+
# Define the global dictionary
10+
params_dict = {
11+
"--seed": [1, 2],
12+
"--place_algorithm": ["criticality_timing"],
13+
"--place_agent_epsilon": [0.3]
14+
}
15+
16+
# Set to True if you only care about specific metrics
17+
keep_metrics_only = True
18+
parsed_metrics = ["num_io", "num_LAB"]
19+
20+
21+
def safe_gmean(series):
22+
series = series.replace({0: np.nan})
23+
return stats.gmean(series.dropna())
24+
25+
def generate_combinations(params_dict):
26+
keys = list(params_dict.keys())
27+
values = list(params_dict.values())
28+
combinations = list(itertools.product(*values))
29+
30+
lines = []
31+
for combination in combinations:
32+
params_str = ' '.join(f"{key} {value}" for key, value in zip(keys, combination))
33+
lines.append(f"script_params_list_add={params_str}\n")
34+
return lines
35+
36+
def parse_results(input_path, params_dict):
37+
# Find the runXXX directory with the largest XXX
38+
run_dirs = [d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()]
39+
if not run_dirs:
40+
print("No runXXX directories found in the specified input path.")
41+
sys.exit(1)
42+
43+
largest_run_dir = max(run_dirs, key=lambda d: int(d[3:]))
44+
largest_run_path = os.path.join(input_path, largest_run_dir)
45+
46+
# Path to parse_results.txt and full_res.csv
47+
parse_results_path = os.path.join(largest_run_path, "parse_results.txt")
48+
full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")
49+
50+
if not os.path.exists(parse_results_path):
51+
print(f"{parse_results_path} not found.")
52+
sys.exit(1)
53+
54+
# Read the parse_results.txt file and write to full_res.csv
55+
with open(parse_results_path, "r") as txt_file, open(full_res_csv_path, "w", newline='') as csv_file:
56+
reader = csv.reader(txt_file, delimiter='\t')
57+
writer = csv.writer(csv_file)
58+
59+
headers = next(reader)
60+
script_params_index = headers.index("script_params")
61+
62+
# Create new headers with params_dict keys
63+
new_headers = headers[:script_params_index] + list(params_dict.keys()) + headers[script_params_index + 1:]
64+
writer.writerow(new_headers)
65+
66+
for row in reader:
67+
script_params_value = row[script_params_index]
68+
script_params_dict = parse_script_params(script_params_value, params_dict)
69+
new_row = row[:script_params_index] + [script_params_dict.get(key, '') for key in params_dict.keys()] + row[script_params_index + 1:]
70+
writer.writerow(new_row)
71+
72+
print(f"Converted {parse_results_path} to {full_res_csv_path}")
73+
74+
# Generate avg_seed.csv if --seed column exists
75+
generate_avg_seed_csv(full_res_csv_path, largest_run_path)
76+
print(f"Generated average seed results")
77+
78+
# Generate gmean_res.csv
79+
generate_geomean_res_csv(os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path, params_dict)
80+
print(f"Generated geometric average results over all the circuits")
81+
82+
generate_xlsx(largest_run_path)
83+
print(f"Generated xlsx that merges all the result csv files")
84+
85+
def generate_xlsx(largest_run_path):
86+
csv_files = [os.path.join(largest_run_path, "full_res.csv"),
87+
os.path.join(largest_run_path, "avg_seed.csv"),
88+
os.path.join(largest_run_path, "geomean_res.csv")]
89+
sheet_names = ["Full res", "Avg. seeds", "Summary"]
90+
output_excel_file = os.path.join(largest_run_path, "summary.xlsx")
91+
# Create an Excel writer object
92+
with pd.ExcelWriter(output_excel_file) as writer:
93+
for csv_file, sheet_name in zip(csv_files, sheet_names):
94+
# Read each CSV file
95+
df = pd.read_csv(csv_file)
96+
97+
# Write each DataFrame to a different sheet
98+
df.to_excel(writer, sheet_name=sheet_name, index=False)
99+
100+
def parse_script_params(script_params, params_dict):
101+
parsed_params = {key: '' for key in params_dict.keys()}
102+
103+
parts = script_params.split('_')
104+
i = 0
105+
106+
while i < len(parts):
107+
for key in params_dict.keys():
108+
key_parts = key.split('_')
109+
key_length = len(key_parts)
110+
111+
if parts[i:i+key_length] == key_parts:
112+
value_parts = []
113+
j = i + key_length
114+
115+
while j < len(parts) and not any(parts[j:j+len(k.split('_'))] == k.split('_') for k in params_dict.keys()):
116+
value_parts.append(parts[j])
117+
j += 1
118+
119+
parsed_params[key] = '_'.join(value_parts)
120+
i = j - 1
121+
break
122+
123+
i += 1
124+
125+
return parsed_params
126+
127+
def generate_avg_seed_csv(full_res_csv_path, output_dir):
128+
129+
df = pd.read_csv(full_res_csv_path)
130+
131+
if keep_metrics_only:
132+
col_to_keep = ['circuit', 'arch']
133+
col_to_keep.extend(list(params_dict.keys()))
134+
col_to_keep.extend(parsed_metrics)
135+
df = df.drop(columns=[col for col in df.columns if col not in col_to_keep])
136+
137+
# Check if '--seed' column is present
138+
if '--seed' in df.columns:
139+
# Determine the grouping keys: ['circuit', 'arch'] + keys from params_dict that are present in the dataframe
140+
grouping_keys = ['circuit', 'arch'] + [key for key in params_dict.keys() if key in df.columns and key != "--seed"]
141+
142+
# Group by specified keys and compute the mean for numeric columns
143+
df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index()
144+
145+
# Drop the '--seed' column if it exists
146+
if '--seed' in df_grouped.columns:
147+
df_grouped.drop(columns=['--seed'], inplace=True)
148+
else:
149+
df_grouped = df
150+
151+
# Save the resulting dataframe to a CSV file
152+
avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv")
153+
df_grouped.to_csv(avg_seed_csv_path, index=False)
154+
155+
def generate_geomean_res_csv(full_res_csv_path, output_dir, params_dict):
156+
df = pd.read_csv(full_res_csv_path)
157+
158+
param_columns = [key for key in params_dict.keys() if key != '--seed']
159+
non_param_columns = [col for col in df.columns if col not in param_columns]
160+
161+
geomean_df = df.groupby(param_columns).agg(
162+
{col: (lambda x: '' if x.dtype == 'object' else safe_gmean(x)) for col in non_param_columns}
163+
).reset_index()
164+
165+
geomean_df.drop(columns=['circuit'], inplace=True)
166+
geomean_df.drop(columns=['arch'], inplace=True)
167+
168+
geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv")
169+
geomean_df.to_csv(geomean_res_csv_path, index=False)
170+
171+
def main():
172+
if len(sys.argv) < 3:
173+
print("Usage: script.py <option> <path_to_directory>")
174+
sys.exit(1)
175+
176+
option = sys.argv[1]
177+
directory_path = sys.argv[2]
178+
179+
if option == "--generate":
180+
# Generate the combinations
181+
lines = generate_combinations(params_dict)
182+
183+
# Define the path to the config file
184+
config_path = os.path.join(directory_path, "config", "config.txt")
185+
186+
# Ensure the config directory exists
187+
os.makedirs(os.path.dirname(config_path), exist_ok=True)
188+
189+
# Append the lines to the config file
190+
with open(config_path, "a") as file:
191+
file.writelines(lines)
192+
193+
print(f"Appended lines to {config_path}")
194+
195+
elif option == "--parse":
196+
parse_results(directory_path, params_dict)
197+
198+
else:
199+
print("Invalid option. Use --generate or --parse")
200+
sys.exit(1)
201+
202+
if __name__ == "__main__":
203+
main()
204+

0 commit comments

Comments
 (0)