Enchanced documentation for timing_place.*. Moved chanx, chany 2d arrays to the placement global file. ALso fixed a bug with in class static constexpr variable compilation issue.

Bill-hbrhbr · Bill-hbrhbr · commit 38f25cc98cfb · 2020-08-25T03:46:10.000-04:00
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
@@ -186,6 +186,28 @@ vtr::vector<ClusterNetId, double> net_timing_cost;
  */
 vtr::vector<ClusterNetId, t_bb> bb_coords, bb_num_on_edges;
 
+/**
+ * @brief 2D arrays used to precompute the inverse of the average
+ *        number of tracks per channel between [subhigh] and [sublow].
+ *
+ * Access them as chan?_place_cost_fac[subhigh][sublow].
+ * They are used to speed up the computation of the cost function that
+ * takes the length of the net bounding box in each dimension, divided
+ * by the average number of tracks in that direction.
+ *
+ * For other cost functions they will never be used.
+ *
+ *   @param chanx_place_cost_fac
+ *              1st dimension index range: [0...device_ctx.grid.width()-2]
+ *   @param chany_place_cost_fac
+ *              1st dimension index range: [0...device_ctx.grid.height()-2]
+ *
+ * For more detailed structure allocation process and index ranges, see
+ * alloc_and_load_for_fast_cost_update().
+ */
+float** chanx_place_cost_fac;
+float** chany_place_cost_fac;
+
 /**
  * @brief The following arrays are used by the try_swap function for speed.
  *
@@ -196,17 +218,6 @@ std::vector<ClusterNetId> ts_nets_to_update;
 
 /********** End of definitions of variables in place_global.h **********/
 
-/* The arrays below are used to precompute the inverse of the average   *
- * number of tracks per channel between [subhigh] and [sublow].  Access *
- * them as chan?_place_cost_fac[subhigh][sublow].  They are used to     *
- * speed up the computation of the cost function that takes the length  *
- * of the net bounding box in each dimension, divided by the average    *
- * number of tracks in that direction; for other cost functions they    *
- * will never be used.                                                  *
- */
-static float** chanx_place_cost_fac; //[0...device_ctx.grid.width()-2]
-static float** chany_place_cost_fac; //[0...device_ctx.grid.height()-2]
-
 /* These file-scoped variables keep track of the number of swaps       *
  * rejected, accepted or aborted. The total number of swap attempts    *
  * is the sum of the three number.                                     */
@@ -2813,4 +2824,4 @@ static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& pla
 
 bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) {
     return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE);
-}
+}
diff --git a/vpr/src/place/place_global.h b/vpr/src/place/place_global.h
@@ -31,4 +31,6 @@ extern ClbNetPinsMatrix<double> proposed_connection_timing_cost;
 extern vtr::vector<ClusterNetId, double> net_timing_cost;
 extern vtr::vector<ClusterNetId, t_bb> bb_coords, bb_num_on_edges;
 extern vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
-extern std::vector<ClusterNetId> ts_nets_to_update;
+extern float** chanx_place_cost_fac;
+extern float** chany_place_cost_fac;
+extern std::vector<ClusterNetId> ts_nets_to_update;
diff --git a/vpr/src/place/place_timing_update.cpp b/vpr/src/place/place_timing_update.cpp
@@ -92,6 +92,26 @@ void initialize_timing_info(float crit_exponent,
  * All the pins with changed connection delays have already been added into
  * the ClusteredPinTimingInvalidator to allow incremental STA update. These
  * changed connection delays are a direct result of moved blocks in try_swap().
+ *
+ * @param crit_exponent            Used to calculate `sharpened` criticalities.
+ *
+ * @param delay_model              Used to calculate the delay between two locations.
+ *
+ * @param criticalities            Mapping interface between atom pin criticalities
+ *                                 and clb pin criticalities.
+ *
+ * @param setup_slacks             Mapping interface between atom pin raw setup slacks
+ *                                 and clb pin raw setup slacks.
+ *
+ * @param pin_timing_invalidator   Stores all the pins that have their delay value changed
+ *                                 and needs to be updated in the timing graph.
+ *
+ * @param timing_info              Stores the timing graph and other important timing info.
+ *
+ * @param timing_update_mode       Determines what should be updated when this routine is
+ *                                 called, and using incremental techniques is appropriate.
+ *
+ * @param costs                    Stores the updated timing cost for the whole placement.
  */
 void update_setup_slacks_and_criticalities(float crit_exponent,
                                            const PlaceDelayModel* delay_model,
@@ -284,9 +304,19 @@ static double sum_td_costs() {
  * @brief Commit all the setup slack values from the PlacerSetupSlacks
  *        class to a vtr matrix.
  *
- * This incremental routine will be correct if and only if it is called
- * immediately after each time update_setup_slacks_and_criticalities
- * updates the setup slacks (i.e. update_setup_slacks = true).
+ * This routine is incremental since it relies on the pins_with_modified_setup_slack()
+ * to detect which pins need to be updated and which pins do not.
+ *
+ * Therefore, it is assumed that this routine is always called immediately after
+ * each time update_setup_slacks_and_criticalities() updates the setup slacks
+ * (i.e. t_placer_timing_update_mode::update_setup_slacks = true). Otherwise,
+ * pins_with_modified_setup_slack() cannot accurately account for all the pins
+ * that have their setup slacks changed, making this routine incorrect.
+ *
+ * Currently, the only exception to the rule above is when setup slack analysis is used
+ * during the placement quench. The new setup slacks might be either accepted or
+ * rejected, so for efficiency reasons, this routine is not called if the slacks are
+ * rejected in the end. For more detailed info, see the try_swap() routine.
  */
 void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
@@ -305,6 +335,12 @@ void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
  * @brief Verify that the values in the vtr matrix matches the PlacerSetupSlacks class.
  *
  * Return true if all values are identical. Otherwise, return false.
+ * Used to check if the timing update has been succesfully revereted if a proposed move
+ * is rejected when applying setup slack analysis during the placement quench.
+ * If successful, the setup slacks in the timing analyzer should be the same as
+ * the setup slacks in connection_setup_slack matrix without running commit_setup_slacks().
+ *
+ * For more detailed info, see the try_swap() routine.
  */
 bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) {
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
@@ -1,19 +1,29 @@
+/**
+ * @file place_util.cpp
+ * @brief Definitions of structure routines declared in place_util.h.
+ */
+
 #include "place_util.h"
 #include "globals.h"
 
+///<File-scope routines.
 static vtr::Matrix<t_grid_blocks> init_grid_blocks();
 static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid);
 
+///@brief Initialize the placement context.
 void init_placement_context() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
 
+    /* Intialize the lookup of CLB block positions */
     place_ctx.block_locs.clear();
     place_ctx.block_locs.resize(cluster_ctx.clb_nlist.blocks().size());
 
+    /* Initialize the reverse lookup of CLB block positions */
     place_ctx.grid_blocks = init_grid_blocks();
 }
 
+///@brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
 static vtr::Matrix<t_grid_blocks> init_grid_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
 
@@ -200,4 +210,4 @@ static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid)
 
     *rlim *= (1. - 0.44 + success_rat);
     *rlim = std::max(std::min(*rlim, upper_lim), 1.f);
-}
+}
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
@@ -1,11 +1,17 @@
+/**
+ * @file place_util.h
+ * @brief Utility structures representing various states of the
+ *        placement. Also contains declarations of related routines.
+ */
+
 #pragma once
 #include "vpr_types.h"
 
 ///@brief Forward declarations.
 class t_placer_costs;
 class t_annealing_state;
 
-///@brief Initialize the placement context
+///@brief Initialize the placement context.
 void init_placement_context();
 
 ///@brief Get the initial limit for inner loop block move attempt limit.
@@ -57,7 +63,7 @@ class t_placer_costs {
     double timing_cost_norm;
 
   private:
-    static constexpr double MAX_INV_TIMING_COST = 1.e9;
+    double MAX_INV_TIMING_COST = 1.e9;
     enum e_place_algorithm place_algorithm;
 
   public: //Constructor
@@ -98,7 +104,7 @@ class t_annealing_state {
     int move_lim;
 
   private:
-    static constexpr float FINAL_RLIM = 1.;
+    float FINAL_RLIM = 1.;
 
   public: //Constructor
     t_annealing_state(const t_annealing_sched& annealing_sched,
@@ -109,4 +115,4 @@ class t_annealing_state {
 
   public: //Accessor
     float final_rlim() const { return FINAL_RLIM; }
-};
+};
diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp
diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h