Skip to content

Commit 870eca6

Browse files
committed
Moved t_placer_costs and t_annealing_state and related routines to placer_util.* files.
1 parent 92c416a commit 870eca6

File tree

3 files changed

+192
-176
lines changed

3 files changed

+192
-176
lines changed

vpr/src/place/place.cpp

Lines changed: 4 additions & 171 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,6 @@ using std::min;
5858
* cost computation. 0.01 means that there is a 1% error tolerance. */
5959
#define ERROR_TOL .01
6060

61-
/* The final rlim (range limit) is 1, which is the smallest value that can *
62-
* still make progress, since an rlim of 0 wouldn't allow any swaps. */
63-
#define FINAL_RLIM 1
64-
6561
/* This defines the maximum number of swap attempts before invoking the *
6662
* once-in-a-while placement legality check as well as floating point *
6763
* variables round-offs check. */
@@ -92,104 +88,6 @@ struct t_placer_statistics {
9288
int success_sum;
9389
};
9490

95-
/**
96-
* @brief Data structure that stores different cost values in the placer.
97-
*
98-
* Although we do cost calculations with float values, we use doubles
99-
* for the accumulated costs to avoid round-off, particularly on large
100-
* designs where the magnitude of a single move's delta cost is small
101-
* compared to the overall cost.
102-
*
103-
* The cost normalization factors are updated upon every temperature change
104-
* in the outer_loop_update_timing_info routine. They are the multiplicative
105-
* inverses of their respective cost values when the routine is called. They
106-
* serve to normalize the trade-off between timing and wirelength (bb).
107-
*
108-
* @param cost The weighted average of the wiring cost and the timing cost.
109-
* @param bb_cost The bounding box cost, aka the wiring cost.
110-
* @param timing_cost The timing cost, which is connection delay * criticality.
111-
*
112-
* @param bb_cost_norm The normalization factor for the wiring cost.
113-
* @param timing_cost_norm The normalization factor for the timing cost, which
114-
* is upper-bounded by the value of MAX_INV_TIMING_COST.
115-
*
116-
* @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
117-
* with very lax timing constraints, which avoids multiplying by a
118-
* gigantic timing_cost_norm when auto-normalizing. The exact value
119-
* of this cost has relatively little impact, but should not be large
120-
* enough to be on the order of timing costs for normal constraints.
121-
*
122-
* @param place_algorithm Determines how the member values are updated upon
123-
* each temperature change during the placer annealing process.
124-
*/
125-
class t_placer_costs {
126-
public:
127-
double cost;
128-
double bb_cost;
129-
double timing_cost;
130-
double bb_cost_norm;
131-
double timing_cost_norm;
132-
133-
private:
134-
static constexpr double MAX_INV_TIMING_COST = 1.e9;
135-
enum e_place_algorithm place_algorithm;
136-
137-
public:
138-
///@brief Constructor that takes in the current placer algorithm.
139-
t_placer_costs(enum e_place_algorithm algo)
140-
: place_algorithm(algo) {
141-
if (place_algorithm != PATH_TIMING_DRIVEN_PLACE) {
142-
VTR_ASSERT_MSG(
143-
place_algorithm == BOUNDING_BOX_PLACE,
144-
"Must pass a valid placer algorithm into the placer cost structure.");
145-
}
146-
}
147-
148-
/**
149-
* @brief Mutator: updates the norm factors in the outer loop.
150-
*
151-
* At each temperature change we update these values to be used
152-
* for normalizing the trade-off between timing and wirelength (bb)
153-
*/
154-
void update_norm_factors() {
155-
if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
156-
bb_cost_norm = 1 / bb_cost;
157-
//Prevent the norm factor from going to infinity
158-
timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
159-
cost = 1; //The value of cost will be reset to 1 if timing driven
160-
} else { //place_algorithm == BOUNDING_BOX_PLACE
161-
cost = bb_cost; //The cost value should be identical to the wirelength cost
162-
}
163-
}
164-
};
165-
166-
/**
167-
* @brief Stores variables that are used by the annealing process.
168-
*
169-
* This structure is updated by update_annealing_state() on each outer
170-
* loop iteration. It stores various important variables that need to
171-
* be accessed during the placement inner loop.
172-
*
173-
* @param t Temperature for simulated annealing.
174-
* @param rlim Range limit for block swaps.
175-
* @param inverse_delta_rlim Used to update crit_exponent.
176-
* @param alpha Temperature decays factor (multiplied each outer loop iteration).
177-
* @param restart_t Temperature used after restart due to minimum success ratio.
178-
* @param crit_exponent Used by timing-driven placement to "sharpen" the timing criticality.
179-
* @param move_lim_max Maximum block move limit.
180-
* @param move_lim Current block move limit.
181-
*/
182-
struct t_annealing_state {
183-
float t;
184-
float rlim;
185-
float inverse_delta_rlim;
186-
float alpha;
187-
float restart_t;
188-
float crit_exponent;
189-
int move_lim_max;
190-
int move_lim;
191-
};
192-
19391
struct t_placer_timing_update_mode {
19492
/* Determines if slacks/criticalities need to be updated */
19593
bool do_update_criticalities;
@@ -593,12 +491,8 @@ static void print_place_status(const size_t num_temps,
593491
size_t tot_moves);
594492
static void print_resources_utilization();
595493

596-
static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent);
597-
598494
static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts);
599495

600-
static int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched);
601-
602496
/*****************************************************************************/
603497
void try_place(const t_placer_opts& placer_opts,
604498
t_annealing_sched annealing_sched,
@@ -822,8 +716,8 @@ void try_place(const t_placer_opts& placer_opts,
822716
/* when trying to determine the starting temp for placement inner loop. */
823717
float first_t = HUGE_POSITIVE_FLOAT;
824718

825-
t_annealing_state state;
826-
init_annealing_state(&state, annealing_sched, first_t, first_rlim, first_move_lim, first_crit_exponent);
719+
/* Initialize annealing state variables */
720+
t_annealing_state state(annealing_sched, first_t, first_rlim, first_move_lim, first_crit_exponent);
827721

828722
/* Update the starting temperature for placement annealing to a more appropriate value */
829723
state.t = starting_t(&state,
@@ -1449,7 +1343,7 @@ static bool update_annealing_state(t_annealing_state* state,
14491343
update_rlim(&state->rlim, success_rat, device_ctx.grid);
14501344

14511345
if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
1452-
state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim)
1346+
state->crit_exponent = (1 - (state->rlim - state->final_rlim()) * state->inverse_delta_rlim)
14531347
* (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
14541348
+ placer_opts.td_place_exp_first;
14551349
}
@@ -3313,26 +3207,6 @@ static void print_resources_utilization() {
33133207
VTR_LOG("\n");
33143208
}
33153209

3316-
static void init_annealing_state(t_annealing_state* state,
3317-
const t_annealing_sched& annealing_sched,
3318-
float t,
3319-
float rlim,
3320-
int move_lim_max,
3321-
float crit_exponent) {
3322-
state->alpha = annealing_sched.alpha_min;
3323-
state->t = t;
3324-
state->restart_t = t;
3325-
state->rlim = rlim;
3326-
state->inverse_delta_rlim = 1 / (rlim - FINAL_RLIM);
3327-
state->move_lim_max = std::max(1, move_lim_max);
3328-
if (annealing_sched.type == DUSTY_SCHED) {
3329-
state->move_lim = std::max(1, (int)(state->move_lim_max * annealing_sched.success_target));
3330-
} else {
3331-
state->move_lim = state->move_lim_max;
3332-
}
3333-
state->crit_exponent = crit_exponent;
3334-
}
3335-
33363210
static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts) {
33373211
e_place_algorithm place_algo = placer_opts.place_algorithm;
33383212
e_place_quench_metric quench_metric = placer_opts.place_quench_metric;
@@ -3352,45 +3226,4 @@ static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& pla
33523226

33533227
bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) {
33543228
return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE);
3355-
}
3356-
3357-
/**
3358-
* @brief Get the initial limit for inner loop block move attempt limit.
3359-
*
3360-
* There are two ways to scale the move limit.
3361-
* e_place_effort_scaling::CIRCUIT
3362-
* scales the move limit proportional to num_blocks ^ (4/3)
3363-
* e_place_effort_scaling::DEVICE_CIRCUIT
3364-
* scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
3365-
*
3366-
* The second method is almost identical to the first one when the device
3367-
* is highly utilized (device_size ~ num_blocks). For low utilization devices
3368-
* (device_size >> num_blocks), the search space is larger, so the second method
3369-
* performs more moves to ensure better optimization.
3370-
*/
3371-
3372-
static int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
3373-
const auto& device_ctx = g_vpr_ctx.device();
3374-
const auto& cluster_ctx = g_vpr_ctx.clustering();
3375-
3376-
auto device_size = device_ctx.grid.width() * device_ctx.grid.height();
3377-
auto num_blocks = cluster_ctx.clb_nlist.blocks().size();
3378-
3379-
int move_lim;
3380-
if (placer_opts.effort_scaling == e_place_effort_scaling::CIRCUIT) {
3381-
move_lim = int(annealing_sched.inner_num * pow(num_blocks, 4. / 3.));
3382-
} else {
3383-
VTR_ASSERT_MSG(
3384-
placer_opts.effort_scaling == e_place_effort_scaling::DEVICE_CIRCUIT,
3385-
"Unrecognized placer effort scaling");
3386-
3387-
move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.));
3388-
}
3389-
3390-
/* Avoid having a non-positive move_lim */
3391-
move_lim = std::max(move_lim, 1);
3392-
3393-
VTR_LOG("Moves per temperature: %d\n", move_lim);
3394-
3395-
return move_lim;
3396-
}
3229+
}

vpr/src/place/place_util.cpp

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,93 @@ static vtr::Matrix<t_grid_blocks> init_grid_blocks() {
2929

3030
return grid_blocks;
3131
}
32+
33+
///@brief Constructor: stores current placer algorithm.
34+
t_placer_costs::t_placer_costs(enum e_place_algorithm algo)
35+
: place_algorithm(algo) {
36+
if (place_algorithm != PATH_TIMING_DRIVEN_PLACE) {
37+
VTR_ASSERT_MSG(
38+
place_algorithm == BOUNDING_BOX_PLACE,
39+
"Must pass a valid placer algorithm into the placer cost structure.");
40+
}
41+
}
42+
43+
/**
44+
* @brief Mutator: updates the norm factors in the outer loop iteration.
45+
*
46+
* At each temperature change we update these values to be used
47+
* for normalizing the trade-off between timing and wirelength (bb)
48+
*/
49+
void t_placer_costs::update_norm_factors() {
50+
if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
51+
bb_cost_norm = 1 / bb_cost;
52+
//Prevent the norm factor from going to infinity
53+
timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
54+
cost = 1; //The value of cost will be reset to 1 if timing driven
55+
} else { //place_algorithm == BOUNDING_BOX_PLACE
56+
cost = bb_cost; //The cost value should be identical to the wirelength cost
57+
}
58+
}
59+
60+
///@brief Constructor: Initialize all annealing state variables.
61+
t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
62+
float first_t,
63+
float first_rlim,
64+
int first_move_lim,
65+
float first_crit_exponent) {
66+
alpha = annealing_sched.alpha_min;
67+
t = first_t;
68+
restart_t = first_t;
69+
rlim = first_rlim;
70+
inverse_delta_rlim = 1 / (first_rlim - FINAL_RLIM);
71+
move_lim_max = first_move_lim;
72+
crit_exponent = first_crit_exponent;
73+
74+
//Determine the current move_lim based on the schedule type
75+
if (annealing_sched.type == DUSTY_SCHED) {
76+
move_lim = std::max(1, (int)(move_lim_max * annealing_sched.success_target));
77+
} else {
78+
move_lim = move_lim_max;
79+
}
80+
}
81+
82+
/**
83+
* @brief Get the initial limit for inner loop block move attempt limit.
84+
*
85+
* There are two ways to scale the move limit.
86+
* e_place_effort_scaling::CIRCUIT
87+
* scales the move limit proportional to num_blocks ^ (4/3)
88+
* e_place_effort_scaling::DEVICE_CIRCUIT
89+
* scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
90+
*
91+
* The second method is almost identical to the first one when the device
92+
* is highly utilized (device_size ~ num_blocks). For low utilization devices
93+
* (device_size >> num_blocks), the search space is larger, so the second method
94+
* performs more moves to ensure better optimization.
95+
*/
96+
97+
int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
98+
const auto& device_ctx = g_vpr_ctx.device();
99+
const auto& cluster_ctx = g_vpr_ctx.clustering();
100+
101+
auto device_size = device_ctx.grid.width() * device_ctx.grid.height();
102+
auto num_blocks = cluster_ctx.clb_nlist.blocks().size();
103+
104+
int move_lim;
105+
if (placer_opts.effort_scaling == e_place_effort_scaling::CIRCUIT) {
106+
move_lim = int(annealing_sched.inner_num * pow(num_blocks, 4. / 3.));
107+
} else {
108+
VTR_ASSERT_MSG(
109+
placer_opts.effort_scaling == e_place_effort_scaling::DEVICE_CIRCUIT,
110+
"Unrecognized placer effort scaling");
111+
112+
move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.));
113+
}
114+
115+
/* Avoid having a non-positive move_lim */
116+
move_lim = std::max(move_lim, 1);
117+
118+
VTR_LOG("Moves per temperature: %d\n", move_lim);
119+
120+
return move_lim;
121+
}

0 commit comments

Comments
 (0)