Skip to content

Commit 38f25cc

Browse files
committed
Enchanced documentation for timing_place.*. Moved chanx, chany 2d arrays to the placement global file. ALso fixed a bug with in class static constexpr variable compilation issue.
1 parent cc4488e commit 38f25cc

File tree

7 files changed

+358
-165
lines changed

7 files changed

+358
-165
lines changed

vpr/src/place/place.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,28 @@ vtr::vector<ClusterNetId, double> net_timing_cost;
186186
*/
187187
vtr::vector<ClusterNetId, t_bb> bb_coords, bb_num_on_edges;
188188

189+
/**
190+
* @brief 2D arrays used to precompute the inverse of the average
191+
* number of tracks per channel between [subhigh] and [sublow].
192+
*
193+
* Access them as chan?_place_cost_fac[subhigh][sublow].
194+
* They are used to speed up the computation of the cost function that
195+
* takes the length of the net bounding box in each dimension, divided
196+
* by the average number of tracks in that direction.
197+
*
198+
* For other cost functions they will never be used.
199+
*
200+
* @param chanx_place_cost_fac
201+
* 1st dimension index range: [0...device_ctx.grid.width()-2]
202+
* @param chany_place_cost_fac
203+
* 1st dimension index range: [0...device_ctx.grid.height()-2]
204+
*
205+
* For more detailed structure allocation process and index ranges, see
206+
* alloc_and_load_for_fast_cost_update().
207+
*/
208+
float** chanx_place_cost_fac;
209+
float** chany_place_cost_fac;
210+
189211
/**
190212
* @brief The following arrays are used by the try_swap function for speed.
191213
*
@@ -196,17 +218,6 @@ std::vector<ClusterNetId> ts_nets_to_update;
196218

197219
/********** End of definitions of variables in place_global.h **********/
198220

199-
/* The arrays below are used to precompute the inverse of the average *
200-
* number of tracks per channel between [subhigh] and [sublow]. Access *
201-
* them as chan?_place_cost_fac[subhigh][sublow]. They are used to *
202-
* speed up the computation of the cost function that takes the length *
203-
* of the net bounding box in each dimension, divided by the average *
204-
* number of tracks in that direction; for other cost functions they *
205-
* will never be used. *
206-
*/
207-
static float** chanx_place_cost_fac; //[0...device_ctx.grid.width()-2]
208-
static float** chany_place_cost_fac; //[0...device_ctx.grid.height()-2]
209-
210221
/* These file-scoped variables keep track of the number of swaps *
211222
* rejected, accepted or aborted. The total number of swap attempts *
212223
* is the sum of the three number. */
@@ -2813,4 +2824,4 @@ static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& pla
28132824

28142825
bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) {
28152826
return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE);
2816-
}
2827+
}

vpr/src/place/place_global.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,6 @@ extern ClbNetPinsMatrix<double> proposed_connection_timing_cost;
3131
extern vtr::vector<ClusterNetId, double> net_timing_cost;
3232
extern vtr::vector<ClusterNetId, t_bb> bb_coords, bb_num_on_edges;
3333
extern vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
34-
extern std::vector<ClusterNetId> ts_nets_to_update;
34+
extern float** chanx_place_cost_fac;
35+
extern float** chany_place_cost_fac;
36+
extern std::vector<ClusterNetId> ts_nets_to_update;

vpr/src/place/place_timing_update.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,26 @@ void initialize_timing_info(float crit_exponent,
9292
* All the pins with changed connection delays have already been added into
9393
* the ClusteredPinTimingInvalidator to allow incremental STA update. These
9494
* changed connection delays are a direct result of moved blocks in try_swap().
95+
*
96+
* @param crit_exponent Used to calculate `sharpened` criticalities.
97+
*
98+
* @param delay_model Used to calculate the delay between two locations.
99+
*
100+
* @param criticalities Mapping interface between atom pin criticalities
101+
* and clb pin criticalities.
102+
*
103+
* @param setup_slacks Mapping interface between atom pin raw setup slacks
104+
* and clb pin raw setup slacks.
105+
*
106+
* @param pin_timing_invalidator Stores all the pins that have their delay value changed
107+
* and needs to be updated in the timing graph.
108+
*
109+
* @param timing_info Stores the timing graph and other important timing info.
110+
*
111+
* @param timing_update_mode Determines what should be updated when this routine is
112+
* called, and using incremental techniques is appropriate.
113+
*
114+
* @param costs Stores the updated timing cost for the whole placement.
95115
*/
96116
void update_setup_slacks_and_criticalities(float crit_exponent,
97117
const PlaceDelayModel* delay_model,
@@ -284,9 +304,19 @@ static double sum_td_costs() {
284304
* @brief Commit all the setup slack values from the PlacerSetupSlacks
285305
* class to a vtr matrix.
286306
*
287-
* This incremental routine will be correct if and only if it is called
288-
* immediately after each time update_setup_slacks_and_criticalities
289-
* updates the setup slacks (i.e. update_setup_slacks = true).
307+
* This routine is incremental since it relies on the pins_with_modified_setup_slack()
308+
* to detect which pins need to be updated and which pins do not.
309+
*
310+
* Therefore, it is assumed that this routine is always called immediately after
311+
* each time update_setup_slacks_and_criticalities() updates the setup slacks
312+
* (i.e. t_placer_timing_update_mode::update_setup_slacks = true). Otherwise,
313+
* pins_with_modified_setup_slack() cannot accurately account for all the pins
314+
* that have their setup slacks changed, making this routine incorrect.
315+
*
316+
* Currently, the only exception to the rule above is when setup slack analysis is used
317+
* during the placement quench. The new setup slacks might be either accepted or
318+
* rejected, so for efficiency reasons, this routine is not called if the slacks are
319+
* rejected in the end. For more detailed info, see the try_swap() routine.
290320
*/
291321
void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
292322
const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
@@ -305,6 +335,12 @@ void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
305335
* @brief Verify that the values in the vtr matrix matches the PlacerSetupSlacks class.
306336
*
307337
* Return true if all values are identical. Otherwise, return false.
338+
* Used to check if the timing update has been succesfully revereted if a proposed move
339+
* is rejected when applying setup slack analysis during the placement quench.
340+
* If successful, the setup slacks in the timing analyzer should be the same as
341+
* the setup slacks in connection_setup_slack matrix without running commit_setup_slacks().
342+
*
343+
* For more detailed info, see the try_swap() routine.
308344
*/
309345
bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
310346
const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;

vpr/src/place/place_util.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,29 @@
1+
/**
2+
* @file place_util.cpp
3+
* @brief Definitions of structure routines declared in place_util.h.
4+
*/
5+
16
#include "place_util.h"
27
#include "globals.h"
38

9+
///<File-scope routines.
410
static vtr::Matrix<t_grid_blocks> init_grid_blocks();
511
static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid);
612

13+
///@brief Initialize the placement context.
714
void init_placement_context() {
815
auto& place_ctx = g_vpr_ctx.mutable_placement();
916
auto& cluster_ctx = g_vpr_ctx.clustering();
1017

18+
/* Intialize the lookup of CLB block positions */
1119
place_ctx.block_locs.clear();
1220
place_ctx.block_locs.resize(cluster_ctx.clb_nlist.blocks().size());
1321

22+
/* Initialize the reverse lookup of CLB block positions */
1423
place_ctx.grid_blocks = init_grid_blocks();
1524
}
1625

26+
///@brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
1727
static vtr::Matrix<t_grid_blocks> init_grid_blocks() {
1828
auto& device_ctx = g_vpr_ctx.device();
1929

@@ -200,4 +210,4 @@ static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid)
200210

201211
*rlim *= (1. - 0.44 + success_rat);
202212
*rlim = std::max(std::min(*rlim, upper_lim), 1.f);
203-
}
213+
}

vpr/src/place/place_util.h

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
1+
/**
2+
* @file place_util.h
3+
* @brief Utility structures representing various states of the
4+
* placement. Also contains declarations of related routines.
5+
*/
6+
17
#pragma once
28
#include "vpr_types.h"
39

410
///@brief Forward declarations.
511
class t_placer_costs;
612
class t_annealing_state;
713

8-
///@brief Initialize the placement context
14+
///@brief Initialize the placement context.
915
void init_placement_context();
1016

1117
///@brief Get the initial limit for inner loop block move attempt limit.
@@ -57,7 +63,7 @@ class t_placer_costs {
5763
double timing_cost_norm;
5864

5965
private:
60-
static constexpr double MAX_INV_TIMING_COST = 1.e9;
66+
double MAX_INV_TIMING_COST = 1.e9;
6167
enum e_place_algorithm place_algorithm;
6268

6369
public: //Constructor
@@ -98,7 +104,7 @@ class t_annealing_state {
98104
int move_lim;
99105

100106
private:
101-
static constexpr float FINAL_RLIM = 1.;
107+
float FINAL_RLIM = 1.;
102108

103109
public: //Constructor
104110
t_annealing_state(const t_annealing_sched& annealing_sched,
@@ -109,4 +115,4 @@ class t_annealing_state {
109115

110116
public: //Accessor
111117
float final_rlim() const { return FINAL_RLIM; }
112-
};
118+
};

0 commit comments

Comments
 (0)