verilog-to-routing
diff --git a/‎vpr/src/place/place.cpp
Lines changed: 95 additions & 208 deletions b/‎vpr/src/place/place.cpp
Lines changed: 95 additions & 208 deletions
diff --git a/‎vpr/src/place/place_delay_model.cpp
Lines changed: 90 additions & 15 deletions b/‎vpr/src/place/place_delay_model.cpp
Lines changed: 90 additions & 15 deletions
diff --git a/‎vpr/src/place/place_delay_model.h
Lines changed: 55 additions & 18 deletions b/‎vpr/src/place/place_delay_model.h
Lines changed: 55 additions & 18 deletions
diff --git a/‎vpr/src/place/place_global.h
Lines changed: 34 additions & 0 deletions b/‎vpr/src/place/place_global.h
Lines changed: 34 additions & 0 deletions
diff --git a/‎vpr/src/place/place_util.cpp
Lines changed: 82 additions & 0 deletions b/‎vpr/src/place/place_util.cpp
Lines changed: 82 additions & 0 deletions
@@ -10,6 +10,8 @@
 #include "vtr_math.h"
 #include "vpr_error.h"
 
+#include "place_global.h"
+
 #ifdef VTR_ENABLE_CAPNPROTO
 #    include "capnp/serialize.h"
 #    include "place_delay_model.capnp.h"
@@ -18,10 +20,7 @@
 #    include "serdes_utils.h"
 #endif /* VTR_ENABLE_CAPNPROTO */
 
-/*
- * DeltaDelayModel
- */
-
+///@brief DeltaDelayModel methods.
 float DeltaDelayModel::delay(int from_x, int from_y, int /*from_pin*/, int to_x, int to_y, int /*to_pin*/) const {
     int delta_x = std::abs(from_x - to_x);
     int delta_y = std::abs(from_y - to_y);
@@ -46,9 +45,11 @@ void DeltaDelayModel::dump_echo(std::string filepath) const {
     vtr::fclose(f);
 }
 
-/*
- * OverrideDelayModel
- */
+const DeltaDelayModel* OverrideDelayModel::base_delay_model() const {
+    return base_delay_model_.get();
+}
+
+///@brief OverrideDelayModel methods.
 float OverrideDelayModel::delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const {
     //First check to if there is an override delay value
     auto& device_ctx = g_vpr_ctx.device();
@@ -136,18 +137,14 @@ float OverrideDelayModel::get_delay_override(int from_type, int from_class, int
     return iter->second;
 }
 
-const DeltaDelayModel* OverrideDelayModel::base_delay_model() const {
-    return base_delay_model_.get();
-}
-
 void OverrideDelayModel::set_base_delay_model(std::unique_ptr<DeltaDelayModel> base_delay_model_obj) {
     base_delay_model_ = std::move(base_delay_model_obj);
 }
 
-// When writing capnp targetted serialization, always allow compilation when
-// VTR_ENABLE_CAPNPROTO=OFF.  Generally this means throwing an exception
-// instead.
-//
+/**
+ * When writing capnp targetted serialization, always allow compilation when
+ * VTR_ENABLE_CAPNPROTO=OFF. Generally this means throwing an exception instead.
+ */
 #ifndef VTR_ENABLE_CAPNPROTO
 
 #    define DISABLE_ERROR                              \
@@ -300,3 +297,81 @@ void OverrideDelayModel::write(const std::string& file) const {
 }
 
 #endif
+
+///@brief Initialize the placer delay model.
+std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(t_chan_width_dist chan_width_dist,
+                                                               const t_placer_opts& placer_opts,
+                                                               const t_router_opts& router_opts,
+                                                               t_det_routing_arch* det_routing_arch,
+                                                               std::vector<t_segment_inf>& segment_inf,
+                                                               const t_direct_inf* directs,
+                                                               const int num_directs) {
+    return compute_place_delay_model(placer_opts, router_opts, det_routing_arch, segment_inf,
+                                     chan_width_dist, directs, num_directs);
+}
+
+/**
+ * @brief Returns the delay of one point to point connection.
+ *
+ * Only estimate delay for signals routed through the inter-block routing network.
+ * TODO: Do how should we compute the delay for globals. "Global signals are assumed to have zero delay."
+ */
+float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, ClusterNetId net_id, int ipin) {
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+    auto& place_ctx = g_vpr_ctx.placement();
+
+    float delay_source_to_sink = 0.;
+
+    if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+        ClusterPinId source_pin = cluster_ctx.clb_nlist.net_driver(net_id);
+        ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net_id, ipin);
+
+        ClusterBlockId source_block = cluster_ctx.clb_nlist.pin_block(source_pin);
+        ClusterBlockId sink_block = cluster_ctx.clb_nlist.pin_block(sink_pin);
+
+        int source_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(source_pin);
+        int sink_block_ipin = cluster_ctx.clb_nlist.pin_logical_index(sink_pin);
+
+        int source_x = place_ctx.block_locs[source_block].loc.x;
+        int source_y = place_ctx.block_locs[source_block].loc.y;
+        int sink_x = place_ctx.block_locs[sink_block].loc.x;
+        int sink_y = place_ctx.block_locs[sink_block].loc.y;
+
+        /**
+         * This heuristic only considers delta_x and delta_y, a much better
+         * heuristic would be to to create a more comprehensive lookup table.
+         *
+         * In particular this approach does not accurately capture the effect
+         * of fast carry-chain connections.
+         */
+        delay_source_to_sink = delay_model->delay(source_x,
+                                                  source_y,
+                                                  source_block_ipin,
+                                                  sink_x,
+                                                  sink_y,
+                                                  sink_block_ipin);
+        if (delay_source_to_sink < 0) {
+            VPR_ERROR(VPR_ERROR_PLACE,
+                      "in comp_td_single_connection_delay: Bad delay_source_to_sink value %g from %s (at %d,%d) to %s (at %d,%d)\n"
+                      "in comp_td_single_connection_delay: Delay is less than 0\n",
+                      block_type_pin_index_to_name(physical_tile_type(source_block), source_block_ipin).c_str(),
+                      source_x, source_y,
+                      block_type_pin_index_to_name(physical_tile_type(sink_block), sink_block_ipin).c_str(),
+                      sink_x, sink_y,
+                      delay_source_to_sink);
+        }
+    }
+
+    return (delay_source_to_sink);
+}
+
+///@brief Recompute all point to point delays, updating `connection_delay` matrix.
+void comp_td_connection_delays(const PlaceDelayModel* delay_model) {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    for (auto net_id : cluster_ctx.clb_nlist.nets()) {
+        for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) {
+            connection_delay[net_id][ipin] = comp_td_single_connection_delay(delay_model, net_id, ipin);
+        }
+    }
+}
@@ -1,3 +1,9 @@
+/**
+ * @file
+ * @brief This file contains all the class and function declarations related to
+ *        the placer delay model. For implementations, see place_delay_model.cpp.
+ */
+
 #ifndef PLACE_DELAY_MODEL_H
 #define PLACE_DELAY_MODEL_H
 
@@ -20,38 +26,63 @@
 #    define ALWAYS_INLINE inline
 #endif
 
-//Abstract interface to a placement delay model
+///@brief Forward declarations.
+class PlaceDelayModel;
+
+///@brief Initialize the placer delay model.
+std::unique_ptr<PlaceDelayModel> alloc_lookups_and_delay_model(t_chan_width_dist chan_width_dist,
+                                                               const t_placer_opts& place_opts,
+                                                               const t_router_opts& router_opts,
+                                                               t_det_routing_arch* det_routing_arch,
+                                                               std::vector<t_segment_inf>& segment_inf,
+                                                               const t_direct_inf* directs,
+                                                               const int num_directs);
+
+///@brief Returns the delay of one point to point connection.
+float comp_td_single_connection_delay(const PlaceDelayModel* delay_model, ClusterNetId net_id, int ipin);
+
+///@brief Recompute all point to point delays, updating `connection_delay` matrix.
+void comp_td_connection_delays(const PlaceDelayModel* delay_model);
+
+///@brief Abstract interface to a placement delay model.
 class PlaceDelayModel {
   public:
     virtual ~PlaceDelayModel() = default;
 
-    // Computes place delay model.
+    ///@brief Computes place delay model.
     virtual void compute(
         RouterDelayProfiler& route_profiler,
         const t_placer_opts& placer_opts,
         const t_router_opts& router_opts,
         int longest_length)
         = 0;
 
-    //Returns the delay estimate between the specified block pins
-    //
-    // Either compute or read methods must be invoked before invoking
-    // delay.
+    /**
+     * @brief Returns the delay estimate between the specified block pins.
+     *
+     * Either compute or read methods must be invoked before invoking delay.
+     */
     virtual float delay(int from_x, int from_y, int from_pin, int to_x, int to_y, int to_pin) const = 0;
 
-    //Dumps the delay model to an echo file
+    ///@brief Dumps the delay model to an echo file.
     virtual void dump_echo(std::string filename) const = 0;
 
-    // Write place delay model to specified file.
-    // May be unimplemented, in which case method should throw an exception.
+    /**
+     * @brief Write place delay model to specified file.
+     *
+     * May be unimplemented, in which case method should throw an exception.
+     */
     virtual void write(const std::string& file) const = 0;
 
-    // Read place delay model from specified file.
-    // May be unimplemented, in which case method should throw an exception.
+    /**
+     * @brief Read place delay model from specified file.
+     *
+     * May be unimplemented, in which case method should throw an exception.
+     */
     virtual void read(const std::string& file) = 0;
 };
 
-//A simple delay model based on the distance (delta) between block locations
+///@brief A simple delay model based on the distance (delta) between block locations.
 class DeltaDelayModel : public PlaceDelayModel {
   public:
     DeltaDelayModel() {}
@@ -109,10 +140,13 @@ class OverrideDelayModel : public PlaceDelayModel {
         short delta_x;
         short delta_y;
 
-        //A combination of ALWAYS_INLINE attribute and std::lexicographical_compare
-        //is required for operator< to be inlined by compiler.
-        //Proper inlining of the function reduces place time by around 5%.
-        //For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225
+        /**
+         * A combination of ALWAYS_INLINE attribute and std::lexicographical_compare
+         * is required for operator< to be inlined by compiler. Proper inlining of the
+         * function reduces place time by around 5%.
+         *
+         * For more information: https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1225
+         */
         friend ALWAYS_INLINE bool operator<(const t_override& lhs, const t_override& rhs) {
             const short* left = reinterpret_cast<const short*>(&lhs);
             const short* right = reinterpret_cast<const short*>(&rhs);
@@ -123,8 +157,11 @@ class OverrideDelayModel : public PlaceDelayModel {
 
     vtr::flat_map2<t_override, float> delay_overrides_;
 
-    //operator< treats memory layout of t_override as an array of short
-    //this requires all members of t_override are shorts and there is no padding between members of t_override
+    /**
+     * operator< treats memory layout of t_override as an array of short.
+     * This requires all members of t_override are shorts and there is no
+     * padding between members of t_override.
+     */
     static_assert(sizeof(t_override) == sizeof(t_override::from_type) + sizeof(t_override::to_type) + sizeof(t_override::from_class) + sizeof(t_override::to_class) + sizeof(t_override::delta_x) + sizeof(t_override::delta_y), "Expect t_override to have a memory layout equivalent to an array of short (no padding)");
     static_assert(sizeof(t_override::from_type) == sizeof(short), "Expect all t_override data members to be shorts");
     static_assert(sizeof(t_override::to_type) == sizeof(short), "Expect all t_override data members to be shorts");
 
@@ -0,0 +1,34 @@
+/**
+ * @file
+ * @brief This file contains all the global data structures referenced across
+ *        multiple files in ./vpr/src/place.
+ *
+ * These global data structures were originally local to place.cpp, and they
+ * were referenced by a lot of routines local to place.cpp. However, to shorten
+ * the file size of place.cpp, these routines are moved to other files.
+ *
+ * Instead of elongating the argument list of the moved routines, I moved the
+ * data structures to here so that they can be easily shared across different
+ * files.
+ *
+ * For detailed descriptions on what each data structure stores, please see
+ * place.cpp, where these variables are defined.
+ */
+
+#pragma once
+#include <vector>
+#include "vtr_vector.h"
+#include "vpr_net_pins_matrix.h"
+#include "timing_place.h"
+
+extern vtr::vector<ClusterNetId, double> net_cost, proposed_net_cost;
+extern vtr::vector<ClusterNetId, char> bb_updated_before;
+extern ClbNetPinsMatrix<float> connection_delay;
+extern ClbNetPinsMatrix<float> proposed_connection_delay;
+extern ClbNetPinsMatrix<float> connection_setup_slack;
+extern PlacerTimingCosts connection_timing_cost;
+extern ClbNetPinsMatrix<double> proposed_connection_timing_cost;
+extern vtr::vector<ClusterNetId, double> net_timing_cost;
+extern vtr::vector<ClusterNetId, t_bb> bb_coords, bb_num_on_edges;
+extern vtr::vector<ClusterNetId, t_bb> ts_bb_coord_new, ts_bb_edge_new;
+extern std::vector<ClusterNetId> ts_nets_to_update;
@@ -2,6 +2,7 @@
 #include "globals.h"
 
 static vtr::Matrix<t_grid_blocks> init_grid_blocks();
+static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid);
 
 void init_placement_context() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -119,3 +120,84 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch
 
     return move_lim;
 }
+
+/**
+ * @brief Update the annealing state according to the annealing schedule selected.
+ *
+ *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
+ *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
+ *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
+ *                See doc/src/vpr/dusty_sa.rst for more details.
+ *
+ * Returns true until the schedule is finished.
+ */
+bool update_annealing_state(t_annealing_state* state,
+                            float success_rat,
+                            const t_placer_costs& costs,
+                            const t_placer_opts& placer_opts,
+                            const t_annealing_sched& annealing_sched) {
+    /* Return `false` when the exit criterion is met. */
+    if (annealing_sched.type == USER_SCHED) {
+        state->t *= annealing_sched.alpha_t;
+        return state->t >= annealing_sched.exit_t;
+    }
+
+    auto& device_ctx = g_vpr_ctx.device();
+    auto& cluster_ctx = g_vpr_ctx.clustering();
+
+    /* Automatic annealing schedule */
+    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+
+    if (annealing_sched.type == DUSTY_SCHED) {
+        bool restart_temp = state->t < t_exit || std::isnan(t_exit); //May get nan if there are no nets
+        if (success_rat < annealing_sched.success_min || restart_temp) {
+            if (state->alpha > annealing_sched.alpha_max) return false;
+            state->t = state->restart_t / sqrt(state->alpha); // Take a half step from the restart temperature.
+            state->alpha = 1.0 - ((1.0 - state->alpha) * annealing_sched.alpha_decay);
+        } else {
+            if (success_rat > annealing_sched.success_target) {
+                state->restart_t = state->t;
+            }
+            state->t *= state->alpha;
+        }
+        state->move_lim = std::max(1, std::min(state->move_lim_max, (int)(state->move_lim_max * (annealing_sched.success_target / success_rat))));
+    } else { /* annealing_sched.type == AUTO_SCHED */
+        if (success_rat > 0.96) {
+            state->alpha = 0.5;
+        } else if (success_rat > 0.8) {
+            state->alpha = 0.9;
+        } else if (success_rat > 0.15 || state->rlim > 1.) {
+            state->alpha = 0.95;
+        } else {
+            state->alpha = 0.8;
+        }
+        state->t *= state->alpha;
+
+        // Must be duplicated to retain previous behavior
+        if (state->t < t_exit || std::isnan(t_exit)) return false;
+    }
+
+    // Gradually changes from the initial crit_exponent to the final crit_exponent based on how much the range limit has shrunk.
+    // The idea is that as the range limit shrinks (indicating we are fine-tuning a more optimized placement) we can focus more on a smaller number of critical connections, which a higher crit_exponent achieves.
+    update_rlim(&state->rlim, success_rat, device_ctx.grid);
+
+    if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) {
+        state->crit_exponent = (1 - (state->rlim - state->final_rlim()) * state->inverse_delta_rlim)
+                                   * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first)
+                               + placer_opts.td_place_exp_first;
+    }
+
+    return true;
+}
+
+/**
+ * @brief Update the range limited to keep acceptance prob. near 0.44.
+ *
+ * Use a floating point rlim to allow gradual transitions at low temps.
+ */
+static void update_rlim(float* rlim, float success_rat, const DeviceGrid& grid) {
+    float upper_lim = std::max(grid.width() - 1, grid.height() - 1);
+
+    *rlim *= (1. - 0.44 + success_rat);
+    *rlim = std::max(std::min(*rlim, upper_lim), 1.f);
+}