Skip to content

Commit b9add7f

Browse files
moved comments from source file to header
1 parent a92ba80 commit b9add7f

File tree

2 files changed

+93
-95
lines changed

2 files changed

+93
-95
lines changed

vpr/src/place/place_util.cpp

Lines changed: 5 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,14 @@
99
#include "draw_global.h"
1010
#include "place_constraints.h"
1111

12-
/* File-scope routines */
13-
static GridBlock init_grid_blocks();
14-
1512
/**
16-
* @brief Initialize the placer's block-grid dual direction mapping.
17-
*
18-
* Forward direction - block to grid: place_ctx.block_locs.
19-
* Reverse direction - grid to block: place_ctx.grid_blocks.
13+
* @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
2014
*
21-
* Initialize both of them to empty states.
15+
* The container at each grid block location should have a length equal to the
16+
* subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
2217
*/
18+
static GridBlock init_grid_blocks();
19+
2320
void init_placement_context() {
2421
auto& place_ctx = g_vpr_ctx.mutable_placement();
2522
auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -32,12 +29,6 @@ void init_placement_context() {
3229
place_ctx.grid_blocks = init_grid_blocks();
3330
}
3431

35-
/**
36-
* @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
37-
*
38-
* The container at each grid block location should have a length equal to the
39-
* subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
40-
*/
4132
static GridBlock init_grid_blocks() {
4233
auto& device_ctx = g_vpr_ctx.device();
4334
int num_layers = device_ctx.grid.get_num_layers();
@@ -56,12 +47,6 @@ static GridBlock init_grid_blocks() {
5647
return grid_blocks;
5748
}
5849

59-
/**
60-
* @brief Mutator: updates the norm factors in the outer loop iteration.
61-
*
62-
* At each temperature change we update these values to be used
63-
* for normalizing the trade-off between timing and wirelength (bb)
64-
*/
6550
void t_placer_costs::update_norm_factors() {
6651
if (place_algorithm.is_timing_driven()) {
6752
bb_cost_norm = 1 / bb_cost;
@@ -73,11 +58,6 @@ void t_placer_costs::update_norm_factors() {
7358
}
7459
}
7560

76-
/**
77-
* @brief Accumulates NoC cost difference terms
78-
*
79-
* @param noc_delta_cost NoC cost difference if the swap is accepted
80-
*/
8161
t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
8262
noc_cost_terms += noc_delta_cost;
8363

@@ -116,20 +96,6 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
11696
UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
11797
}
11898

119-
/**
120-
* @brief Get the initial limit for inner loop block move attempt limit.
121-
*
122-
* There are two ways to scale the move limit.
123-
* e_place_effort_scaling::CIRCUIT
124-
* scales the move limit proportional to num_blocks ^ (4/3)
125-
* e_place_effort_scaling::DEVICE_CIRCUIT
126-
* scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
127-
*
128-
* The second method is almost identical to the first one when the device
129-
* is highly utilized (device_size ~ num_blocks). For low utilization devices
130-
* (device_size >> num_blocks), the search space is larger, so the second method
131-
* performs more moves to ensure better optimization.
132-
*/
13399
int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
134100
const auto& device_ctx = g_vpr_ctx.device();
135101
const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -153,16 +119,6 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch
153119
return move_lim;
154120
}
155121

156-
/**
157-
* @brief Update the annealing state according to the annealing schedule selected.
158-
*
159-
* USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria.
160-
* AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio.
161-
* DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
162-
* See doc/src/vpr/dusty_sa.rst for more details.
163-
*
164-
* @return True->continues the annealing. False->exits the annealing.
165-
*/
166122
bool t_annealing_state::outer_loop_update(float success_rate,
167123
const t_placer_costs& costs,
168124
const t_placer_opts& placer_opts,
@@ -248,33 +204,12 @@ bool t_annealing_state::outer_loop_update(float success_rate,
248204
return true;
249205
}
250206

251-
/**
252-
* @brief Update the range limiter to keep acceptance prob. near 0.44.
253-
*
254-
* Use a floating point rlim to allow gradual transitions at low temps.
255-
* The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
256-
*/
257207
void t_annealing_state::update_rlim(float success_rate) {
258208
rlim *= (1. - 0.44 + success_rate);
259209
rlim = std::min(rlim, UPPER_RLIM);
260210
rlim = std::max(rlim, FINAL_RLIM);
261211
}
262212

263-
/**
264-
* @brief Update the criticality exponent.
265-
*
266-
* When rlim shrinks towards the FINAL_RLIM value (indicating
267-
* that we are fine-tuning a more optimized placement), we can
268-
* focus more on a smaller number of critical connections.
269-
* To achieve this, we make the crit_exponent sharper, so that
270-
* critical connections would become more critical than before.
271-
*
272-
* We calculate how close rlim is to its final value comparing
273-
* to its initial value. Then, we apply the same scaling factor
274-
* on the crit_exponent so that it lands on the suitable value
275-
* between td_place_exp_first and td_place_exp_last. The scaling
276-
* factor is calculated and applied linearly.
277-
*/
278213
void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
279214
/* If rlim == FINAL_RLIM, then scale == 0. */
280215
float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
@@ -284,11 +219,6 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
284219
+ placer_opts.td_place_exp_first;
285220
}
286221

287-
/**
288-
* @brief Update the move limit based on the success rate.
289-
*
290-
* The value is bounded between 1 and move_lim_max.
291-
*/
292222
void t_annealing_state::update_move_lim(float success_target, float success_rate) {
293223
move_lim = move_lim_max * (success_target / success_rate);
294224
move_lim = std::min(move_lim, move_lim_max);
@@ -330,13 +260,6 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int
330260
std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
331261
}
332262

333-
/**
334-
* @brief Returns the standard deviation of data set x.
335-
*
336-
* There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
337-
* is the average x. All operations are done in double precision, since round off
338-
* error can be a problem in the initial temp. std_dev calculation for big circuits.
339-
*/
340263
double get_std_dev(int n, double sum_x_squared, double av_x) {
341264
double std_dev;
342265
if (n <= 1) {
@@ -396,15 +319,6 @@ void zero_initialize_grid_blocks() {
396319
}
397320
}
398321

399-
/**
400-
* @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
401-
*
402-
* @param legal_pos
403-
* a lookup of all subtiles by sub_tile type
404-
* legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations
405-
* of the proper tile type and sub_tile type
406-
*
407-
*/
408322
void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos) {
409323
auto& device_ctx = g_vpr_ctx.device();
410324
auto& place_ctx = g_vpr_ctx.placement();

vpr/src/place/place_util.h

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,19 @@ class t_placer_costs {
9797
t_placer_costs() = default;
9898

9999
public: //Mutator
100+
/**
101+
* @brief Mutator: updates the norm factors in the outer loop iteration.
102+
*
103+
* At each temperature change we update these values to be used
104+
* for normalizing the trade-off between timing and wirelength (bb)
105+
*/
100106
void update_norm_factors();
107+
108+
/**
109+
* @brief Accumulates NoC cost difference terms
110+
*
111+
* @param noc_delta_cost Cost difference for NoC-related costs terms
112+
*/
101113
t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
102114

103115
private:
@@ -180,14 +192,52 @@ class t_annealing_state {
180192
int num_layers);
181193

182194
public: //Mutator
195+
/**
196+
* @brief Update the annealing state according to the annealing schedule selected.
197+
*
198+
* USER_SCHED: A manual fixed schedule with fixed alpha and exit criteria.
199+
* AUTO_SCHED: A more sophisticated schedule where alpha varies based on success ratio.
200+
* DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
201+
* See doc/src/vpr/dusty_sa.rst for more details.
202+
*
203+
* @return True->continues the annealing. False->exits the annealing.
204+
*/
183205
bool outer_loop_update(float success_rate,
184206
const t_placer_costs& costs,
185207
const t_placer_opts& placer_opts,
186208
const t_annealing_sched& annealing_sched);
187209

188210
private: //Mutator
211+
/**
212+
* @brief Update the range limiter to keep acceptance prob. near 0.44.
213+
*
214+
* Use a floating point rlim to allow gradual transitions at low temps.
215+
* The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
216+
*/
189217
inline void update_rlim(float success_rate);
218+
219+
/**
220+
* @brief Update the criticality exponent.
221+
*
222+
* When rlim shrinks towards the FINAL_RLIM value (indicating
223+
* that we are fine-tuning a more optimized placement), we can
224+
* focus more on a smaller number of critical connections.
225+
* To achieve this, we make the crit_exponent sharper, so that
226+
* critical connections would become more critical than before.
227+
*
228+
* We calculate how close rlim is to its final value comparing
229+
* to its initial value. Then, we apply the same scaling factor
230+
* on the crit_exponent so that it lands on the suitable value
231+
* between td_place_exp_first and td_place_exp_last. The scaling
232+
* factor is calculated and applied linearly.
233+
*/
190234
inline void update_crit_exponent(const t_placer_opts& placer_opts);
235+
236+
/**
237+
* @brief Update the move limit based on the success rate.
238+
*
239+
* The value is bounded between 1 and move_lim_max.
240+
*/
191241
inline void update_move_lim(float success_target, float success_rate);
192242
};
193243

@@ -245,13 +295,39 @@ class t_placer_statistics {
245295
void single_swap_update(const t_placer_costs& costs);
246296
};
247297

248-
///@brief Initialize the placer's block-grid dual direction mapping.
298+
/**
299+
* @brief Initialize the placer's block-grid dual direction mapping.
300+
*
301+
* Forward direction - block to grid: place_ctx.block_locs.
302+
* Reverse direction - grid to block: place_ctx.grid_blocks.
303+
*
304+
* Initialize both of them to empty states.
305+
*/
249306
void init_placement_context();
250307

251-
///@brief Get the initial limit for inner loop block move attempt limit.
308+
/**
309+
* @brief Get the initial limit for inner loop block move attempt limit.
310+
*
311+
* There are two ways to scale the move limit.
312+
* e_place_effort_scaling::CIRCUIT
313+
* scales the move limit proportional to num_blocks ^ (4/3)
314+
* e_place_effort_scaling::DEVICE_CIRCUIT
315+
* scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
316+
*
317+
* The second method is almost identical to the first one when the device
318+
* is highly utilized (device_size ~ num_blocks). For low utilization devices
319+
* (device_size >> num_blocks), the search space is larger, so the second method
320+
* performs more moves to ensure better optimization.
321+
*/
252322
int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched);
253323

254-
///@brief Returns the standard deviation of data set x.
324+
/**
325+
* @brief Returns the standard deviation of data set x.
326+
*
327+
* There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
328+
* is the average x. All operations are done in double precision, since round off
329+
* error can be a problem in the initial temp. std_dev calculation for big circuits.
330+
*/
255331
double get_std_dev(int n, double sum_x_squared, double av_x);
256332

257333
///@brief Initialize usage to 0 and blockID to EMPTY_BLOCK_ID for all place_ctx.grid_block locations
@@ -260,7 +336,15 @@ void zero_initialize_grid_blocks();
260336
///@brief a utility to calculate grid_blocks given the updated block_locs (used in restore_checkpoint)
261337
void load_grid_blocks_from_block_locs();
262338

263-
///@brief Builds legal_pos structure. legal_pos[type->index] is an array that gives every legal value of (x,y,z) that can accommodate a block.
339+
/**
340+
* @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
341+
*
342+
* @param legal_pos
343+
* a lookup of all subtiles by sub_tile type
344+
* legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations
345+
* of the proper tile type and sub_tile type
346+
*
347+
*/
264348
void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos);
265349

266350
///@brief Performs error checking to see if location is legal for block type, and sets the location and grid usage of the block if it is legal.

0 commit comments

Comments
 (0)