From f4ea4a15c6701b6f4b4ace9f0ee3ff83d0c36d75 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 23 Jul 2020 18:24:46 -0400 Subject: [PATCH 01/24] Added interface for mapping between CLB pins and setup slacks. Refactored PlacerCriticalities, and created PlacerSetupSlacks, so that they can choose between doing incremental V.S. from scratch updates. --- vpr/src/place/place.cpp | 8 +- vpr/src/place/timing_place.cpp | 174 ++++++++++++++++++++++++--------- vpr/src/place/timing_place.h | 68 ++++++++++++- vpr/src/timing/timing_util.cpp | 17 ++++ 4 files changed, 215 insertions(+), 52 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index ef6f0ba8c74..b9ceb07e04f 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -164,6 +164,10 @@ static vtr::vector net_timing_cost; //Like connection_timi static vtr::vector bb_coords, bb_num_on_edges; +/* Determines if slacks/criticalities need to be recomputed from scratch */ +static bool do_recompute_criticalities = false; +static bool do_recompute_slacks = false; + /* The arrays below are used to precompute the inverse of the average * * number of tracks per channel between [subhigh] and [sublow]. Access * * them as chan?_place_cost_fac[subhigh][sublow]. They are used to * @@ -1103,7 +1107,7 @@ static void placement_inner_loop(float t, /* Lines below prevent too much round-off error from accumulating * in the cost over many iterations (due to incremental updates). - * This round-off can lead to error checks failing because the cost + * This round-off can lead to error checks failing because the cost * is different from what you get when you recompute from scratch. */ ++(*moves_since_cost_recompute); @@ -1894,7 +1898,7 @@ static void update_td_costs(const PlaceDelayModel* delay_model, const PlacerCrit if (cluster_ctx.clb_nlist.net_is_ignored(clb_net)) continue; int ipin = clb_nlist.pin_net_index(clb_pin); - VTR_ASSERT_SAFE(ipin >= 0 && ipin < int(clb_nlist.net_pins(clb_net).size())); + VTR_ASSERT_SAFE(ipin >= 1 && ipin < int(clb_nlist.net_pins(clb_net).size())); double new_timing_cost = comp_td_connection_cost(delay_model, place_crit, clb_net, ipin); diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index e62eab6c894..3043c0e1089 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -14,8 +14,9 @@ #include "timing_info.h" -//Use an incremental approach to updaing criticalities? +//Use an incremental approach to updating criticalities and setup slacks? constexpr bool INCR_UPDATE_CRITICALITIES = true; +constexpr bool INCR_UPDATE_SETUP_SLACKS = true; /**************************************/ @@ -27,58 +28,29 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } -/**************************************/ -void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent) { - /* Performs a 1-to-1 mapping from criticality to timing_place_crit_. - * For every pin on every net (or, equivalently, for every tedge ending - * in that pin), timing_place_crit_ = criticality^(criticality exponent) */ +void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent, bool recompute) { + //If the criticalities are not updated immediately after each time we call + //timing_info->update(), then timing_info->pins_with_modified_setup_criticality() + //cannot accurately account for all the pins that need to be updated. + //In this case, we pass in recompute=true to update all criticalities from scratch. + // + //If the criticality exponent has changed, we also need to update from scratch. //Determine what pins need updating - if (INCR_UPDATE_CRITICALITIES) { - cluster_pins_with_modified_criticality_.clear(); - if (crit_exponent != last_crit_exponent_) { - //Criticality exponent changed, must re-calculate criticalities for *all* sink pins - for (ClusterNetId net_id : clb_nlist_.nets()) { - for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { - cluster_pins_with_modified_criticality_.insert(pin_id); - } - } - - //Record new criticality exponent - last_crit_exponent_ = crit_exponent; - } else { - //Criticality exponent unchanged - // - //Collect the cluster pins which need to be updated based on the latest timing - //analysis - // - //Note we use the set of pins reported by the *timing_info* as having modified - //criticality, rather than those marked as modified by the timing analyzer. - //Since timing_info uses shifted/relaxed criticality (which depends on max - //required time and worst case slacks), additional nodes may be modified - //when updating the atom pin criticalities. - - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) { - ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); - - //Some atom pins correspond to connections which are completely - //contained within a cluster, and hence have no corresponding - //clustered pin. - if (!clb_pin) continue; - - cluster_pins_with_modified_criticality_.insert(clb_pin); - } - } + if (!recompute && crit_exponent == last_crit_exponent_ && INCR_UPDATE_CRITICALITIES) { + incr_update_criticalities(timing_info); } else { - //Non-incremental: all pins and nets need updating - for (ClusterNetId net_id : clb_nlist_.nets()) { - for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { - cluster_pins_with_modified_criticality_.insert(pin_id); - } - } + recompute_criticalities(timing_info); + + //Record new criticality exponent + last_crit_exponent_ = crit_exponent; } - //Update the effected pins + /* Performs a 1-to-1 mapping from criticality to timing_place_crit_. + * For every pin on every net (or, equivalently, for every tedge ending + * in that pin), timing_place_crit_ = criticality^(criticality exponent) */ + + // Update the effected pins for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) { ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); @@ -92,6 +64,41 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf } } +void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) { + cluster_pins_with_modified_criticality_.clear(); + + //Collect the cluster pins which need to be updated based on the latest timing + //analysis + // + //Note we use the set of pins reported by the *timing_info* as having modified + //criticality, rather than those marked as modified by the timing analyzer. + //Since timing_info uses shifted/relaxed criticality (which depends on max + //required time and worst case slacks), additional nodes may be modified + //when updating the atom pin criticalities. + + for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) { + ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); + + //Some atom pins correspond to connections which are completely + //contained within a cluster, and hence have no corresponding + //clustered pin. + if (!clb_pin) continue; + + cluster_pins_with_modified_criticality_.insert(clb_pin); + } +} + +void PlacerCriticalities::recompute_criticalities(const SetupTimingInfo* timing_info) { + cluster_pins_with_modified_criticality_.clear(); + + //Non-incremental: all sink pins need updating + for (ClusterNetId net_id : clb_nlist_.nets()) { + for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { + cluster_pins_with_modified_criticality_.insert(pin_id); + } + } +} + void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float val) { timing_place_crit_[net_id][ipin] = val; } @@ -100,6 +107,77 @@ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticali return vtr::make_range(cluster_pins_with_modified_criticality_); } +/**************************************/ + +/* Allocates space for the timing_place_setup_slacks_ data structure */ +PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) + : clb_nlist_(clb_nlist) + , pin_lookup_(netlist_pin_lookup) + , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { +} + +void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info, bool recompute) { + //If the setup slacks are not updated immediately after each time we call + //timing_info->update(), then timing_info->pins_with_modified_setup_slack() + //cannot accurately account for all the pins that need to be updated. + //In this case, we pass in recompute=true to update all setup slacks from scratch. + if (!recompute && INCR_UPDATE_SETUP_SLACKS) { + incr_update_setup_slacks(timing_info); + } else { + recompute_setup_slacks(timing_info); + } + + //Update the effected pins + for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) { + ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); + int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); + + float clb_pin_setup_slack = calculate_clb_net_pin_setup_slack(*timing_info, pin_lookup_, clb_pin); + + timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; + } +} + +void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) { + cluster_pins_with_modified_setup_slack_.clear(); + + //Collect the cluster pins which need to be updated based on the latest timing analysis + // + //Note we use the set of pins reported by the *timing_info* as having modified + //setup slacks, rather than those marked as modified by the timing analyzer. + for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) { + ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); + + //Some atom pins correspond to connections which are completely + //contained within a cluster, and hence have no corresponding + //clustered pin. + if (!clb_pin) continue; + + cluster_pins_with_modified_setup_slack_.insert(clb_pin); + } +} + +void PlacerSetupSlacks::recompute_setup_slacks(const SetupTimingInfo* timing_info) { + cluster_pins_with_modified_setup_slack_.clear(); + + //Non-incremental: all sink pins need updating + for (ClusterNetId net_id : clb_nlist_.nets()) { + for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { + cluster_pins_with_modified_setup_slack_.insert(pin_id); + } + } +} + +void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float val) { + timing_place_setup_slacks_[net_id][ipin] = val; +} + +PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const { + return vtr::make_range(cluster_pins_with_modified_setup_slack_); +} + +/**************************************/ + std::unique_ptr alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist, const t_placer_opts& placer_opts, const t_router_opts& router_opts, diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index c3d8a41c3a1..fff1c6ab5f1 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -63,9 +63,9 @@ class PlacerCriticalities { pin_range pins_with_modified_criticality() const; public: //Modifiers - //Incrementally updates criticalities based on the atom netlist criticalitites provied by + //Updates criticalities based on the atom netlist criticalitites provided by //timing_info and the provided criticality_exponent. - void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent); + void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent, bool recompute); //Override the criticality of a particular connection void set_criticality(ClusterNetId net, int ipin, float val); @@ -81,6 +81,70 @@ class PlacerCriticalities { //Set of pins with criticaltites modified by last call to update_criticalities() vtr::vec_id_set cluster_pins_with_modified_criticality_; + + //Updates criticalities: incremental V.S. from scratch + void incr_update_criticalities(const SetupTimingInfo* timing_info); + void recompute_criticalities(const SetupTimingInfo* timing_info); +}; + +/* Usage + * ===== + * PlacerSetupSlacks returns the clustered netlist connection setup slack used by + * the placer. This also serves to map atom netlist level slack (i.e. on AtomPinIds) + * to the clustered netlist (i.e. ClusterPinIds) used during placement. + * + * Setup slacks are calculated by calling update_setup_slacks(), which will + * update setup slacks based on the atom netlist connection setup slacks provided by + * the passed in SetupTimingInfo. This is done incrementally, based on the modified + * connections/AtomPinIds returned by SetupTimingInfo. + * + * The setup slacks of individual connections can then be queried by calling the + * setup_slack() member function. + * + * It also supports iterating via pins_with_modified_setup_slack() through the + * clustered netlist pins/connections which have had their setup slacks modified by + * the last call to update_setup_slacks(). + */ +class PlacerSetupSlacks { + public: //Types + typedef vtr::vec_id_set::iterator pin_iterator; + typedef vtr::vec_id_set::iterator net_iterator; + + typedef vtr::Range pin_range; + typedef vtr::Range net_range; + + public: //Lifetime + PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup); + PlacerSetupSlacks(const PlacerSetupSlacks& clb_nlist) = delete; + PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; + + public: //Accessors + //Returns the setup slack of the specified connection + float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slack_[net][ipin]; } + + //Returns the range of clustered netlist pins (i.e. ClusterPinIds) which were modified + //by the last call to update_setup_slacks() + pin_range pins_with_modified_setup_slack() const; + + public: //Modifiers + //Updates setup slacks based on the atom netlist setup slacks provided by timing_info + void update_setup_slacks(const SetupTimingInfo* timing_info, bool recompute); + + //Override the setup slack of a particular connection + void set_setup_slack(ClusterNetId net, int ipin, float val); + + private: //Data + const ClusteredNetlist& clb_nlist_; + const ClusteredPinAtomPinsLookup& pin_lookup_; + + ClbNetPinsMatrix timing_place_setup_slacks_; /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ + + //Set of pins with criticaltites modified by last call to update_criticalities() + vtr::vec_id_set cluster_pins_with_modified_setup_slack_; + + //Updates setup slacks: incremental V.S. from scratch + void incr_update_setup_slacks(const SetupTimingInfo* timing_info); + void recompute_setup_slacks(const SetupTimingInfo* timing_info); }; /* Usage diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 6dd2c06d249..6ad86f4ca43 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -579,6 +579,23 @@ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, cons return clb_pin_crit; } +//Return the slack of a net's pin in the CLB netlist +float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) { + //There may be multiple atom netlist pins connected to this CLB pin + float clb_pin_setup_slack = std::numeric_limits::quiet_NaN(); + + for (const auto atom_pin : pin_lookup.connected_atom_pins(clb_pin)) { + //Take the worst of the atom pin slacks as the CLB pin slack + if (std::isnan(clb_pin_setup_slack)) { + clb_pin_setup_slack = timing_info.setup_pin_slack(atom_pin); + } else { + clb_pin_setup_slack = std::min(clb_pin_setup_slack, timing_info.setup_pin_slack(atom_pin)); + } + } + + return clb_pin_setup_slack; +} + //Returns the worst (maximum) criticality of the set of slack tags specified. Requires the maximum //required time and worst slack for all domain pairs represent by the slack tags // From c024603536cc9fad343065975bb802561051b4bc Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 23 Jul 2020 19:12:25 -0400 Subject: [PATCH 02/24] Refactored criticalities update in place.cpp and added setup slacks update. Added checks to see if the updates need to be done from scratch or can be done incrementally --- vpr/src/place/place.cpp | 246 ++++++++++++++++++++++++++++------------ 1 file changed, 172 insertions(+), 74 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index b9ceb07e04f..d52d4d4d6e7 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -165,8 +165,8 @@ static vtr::vector net_timing_cost; //Like connection_timi static vtr::vector bb_coords, bb_num_on_edges; /* Determines if slacks/criticalities need to be recomputed from scratch */ -static bool do_recompute_criticalities = false; -static bool do_recompute_slacks = false; +static bool do_recompute_criticalities = true; +static bool do_recompute_setup_slacks = true; /* The arrays below are used to precompute the inverse of the average * * number of tracks per channel between [subhigh] and [sublow]. Access * @@ -417,16 +417,16 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr); static void free_try_swap_arrays(); -static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts, - t_placer_costs* costs, - t_placer_prev_inverse_costs* prev_inverse_costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info); +static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, + t_placer_costs* costs, + t_placer_prev_inverse_costs* prev_inverse_costs, + int num_connections, + float crit_exponent, + int* outer_crit_iter_count, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info); static void recompute_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, @@ -435,6 +435,25 @@ static void recompute_criticalities(float crit_exponent, SetupTimingInfo* timing_info, t_placer_costs* costs); +static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info); + +static void update_criticalities(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs); + +static void update_setup_slacks_and_criticalities(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs); + static void placement_inner_loop(float t, int temp_num, float rlim, @@ -590,6 +609,8 @@ void try_place(const t_placer_opts& placer_opts, timing_info = make_setup_timing_info(placement_delay_calc, placer_opts.timing_update_type); + placer_setup_slacks = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); + placer_criticalities = std::make_unique(cluster_ctx.clb_nlist, netlist_pin_lookup); pin_timing_invalidator = std::make_unique(cluster_ctx.clb_nlist, @@ -598,12 +619,12 @@ void try_place(const t_placer_opts& placer_opts, atom_ctx.lookup, *timing_info->timing_graph()); //Update timing and costs - recompute_criticalities(crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); + update_criticalities(crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &costs); timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement @@ -754,14 +775,14 @@ void try_place(const t_placer_opts& placer_opts, costs.cost = 1; } - outer_loop_recompute_criticalities(placer_opts, &costs, &prev_inverse_costs, - num_connections, - crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get()); + outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, + num_connections, + crit_exponent, + &outer_crit_iter_count, + place_delay_model.get(), + placer_criticalities.get(), + pin_timing_invalidator.get(), + timing_info.get()); placement_inner_loop(t, num_temps, rlim, placer_opts, move_lim, crit_exponent, inner_recompute_limit, &stats, @@ -818,15 +839,15 @@ void try_place(const t_placer_opts& placer_opts, { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - outer_loop_recompute_criticalities(placer_opts, &costs, - &prev_inverse_costs, - num_connections, - crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get()); + outer_loop_update_criticalities(placer_opts, &costs, + &prev_inverse_costs, + num_connections, + crit_exponent, + &outer_crit_iter_count, + place_delay_model.get(), + placer_criticalities.get(), + pin_timing_invalidator.get(), + timing_info.get()); t = 0; /* freeze out */ @@ -892,12 +913,12 @@ void try_place(const t_placer_opts& placer_opts, VTR_ASSERT(timing_info); //Update timing and costs - recompute_criticalities(crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); + update_criticalities(crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &costs); critical_path = timing_info->least_slack_critical_path(); @@ -950,17 +971,17 @@ void try_place(const t_placer_opts& placer_opts, VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", f_update_td_costs_connections_elapsed_sec, f_update_td_costs_nets_elapsed_sec, f_update_td_costs_sum_nets_elapsed_sec, f_update_td_costs_total_elapsed_sec); } -/* Function to recompute the criticalities before the inner loop of the annealing */ -static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts, - t_placer_costs* costs, - t_placer_prev_inverse_costs* prev_inverse_costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info) { +/* Function to update the criticalities before the inner loop of the annealing */ +static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, + t_placer_costs* costs, + t_placer_prev_inverse_costs* prev_inverse_costs, + int num_connections, + float crit_exponent, + int* outer_crit_iter_count, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info) { if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) return; @@ -975,12 +996,13 @@ static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts, VTR_ASSERT(num_connections > 0); //Update timing information - recompute_criticalities(crit_exponent, - delay_model, - criticalities, - pin_timing_invalidator, - timing_info, - costs); + update_criticalities(crit_exponent, + delay_model, + criticalities, + pin_timing_invalidator, + timing_info, + &costs); + *outer_crit_iter_count = 0; } (*outer_crit_iter_count)++; @@ -992,19 +1014,45 @@ static void outer_loop_recompute_criticalities(const t_placer_opts& placer_opts, prev_inverse_costs->timing_cost = min(1 / costs->timing_cost, MAX_INV_TIMING_COST); } -//Update timing information based on current placement by running STA to get new slacks, -//and calculate updated criticalities and timing costs -static void recompute_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_costs* costs) { +//Update timing information based on current placement by running STA +//and record the new setup slack information +static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info) { + + //Run STA to update slacks and adjusted/relaxed criticalities + timing_info->update(); + + //Update placer's setup slacks + setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); + + //Setup slacks are now in sync with the timing_info + //Can perform incremental updates next time + do_recompute_setup_slacks = false; + + //Criticalities are now out of sync with the timing_info + //Must do from scratch recompute next time + do_recompute_criticalities = true; + + //Clear invalidation state + pin_timing_invalidator->reset(); +} + +//Update timing information based on current placement by running STA +//and calculate the updated criticalities and timing costs +//(based on the new setup slacks) +static void update_criticalities(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs) { + //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); - //Update placer'criticalities (e.g. sharpen with crit_exponent) - criticalities->update_criticalities(timing_info, crit_exponent); + //Update placer's criticalities (e.g. sharpen with crit_exponent) + criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); //Update connection, net and total timing costs based on new criticalities #ifdef INCR_COMP_TD_COSTS @@ -1013,10 +1061,59 @@ static void recompute_criticalities(float crit_exponent, comp_td_costs(delay_model, *criticalities, &costs->timing_cost); #endif + //Criticalities are now in sync with the timing_info + //Can perform incremental updates next time + do_recompute_criticalities = false; + + //Setup slacks are now out of sync with the timing_info + //Must do from scratch recompute next time + do_recompute_setup_slacks = true; + //Clear invalidation state pin_timing_invalidator->reset(); } +//Update timing information based on current placement by running STA. +//Record the new slack information as well as calculate the updated +//criticalities and timing costs (based on the new setup slacks) +// +//If both setup slacks and criticalities need to be updated, +//this routine should be called, instead of individual update routine. +//This is to prevent unnecessary from scratch updates +static void update_setup_slacks_and_criticalities(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs) { + + //Run STA to update slacks and adjusted/relaxed criticalities + timing_info->update(); + + //Update placer's setup slacks + setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); + + //Update placer's criticalities (e.g. sharpen with crit_exponent) + criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); + + //Update connection, net and total timing costs based on new criticalities +#ifdef INCR_COMP_TD_COSTS + update_td_costs(delay_model, *criticalities, &costs->timing_cost); +#else + comp_td_costs(delay_model, *criticalities, &costs->timing_cost); +#endif + + //Both Setup slacks and Criticalities are now in sync with the timing_info + //They can be both incrementally updated next time + do_recompute_setup_slacks = false; + do_recompute_criticalities = false; + + //Clear invalidation state + pin_timing_invalidator->reset(); +} + + /* Function which contains the inner loop of the simulated annealing */ static void placement_inner_loop(float t, int temp_num, @@ -1088,12 +1185,13 @@ static void placement_inner_loop(float t, /* Using the delays in connection_delay, do a timing analysis to update slacks and * criticalities and update the timing cost since it will change. */ - recompute_criticalities(crit_exponent, - delay_model, - criticalities, - pin_timing_invalidator, - timing_info, - costs); + //Update timing information + update_criticalities(crit_exponent, + delay_model, + criticalities, + pin_timing_invalidator, + timing_info, + costs); } inner_crit_iter_count++; } From cb6e9a6bf90f3cf2619d939a184593ca2b488f47 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 23 Jul 2020 19:41:56 -0400 Subject: [PATCH 03/24] Fixe up format and compilation errors --- vpr/src/place/place.cpp | 22 ++++++++-------------- vpr/src/place/timing_place.cpp | 12 ++++++------ vpr/src/place/timing_place.h | 10 +++++----- vpr/src/timing/timing_util.cpp | 2 +- vpr/src/timing/timing_util.h | 3 +++ 5 files changed, 23 insertions(+), 26 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index d52d4d4d6e7..81a32b4acee 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -164,6 +164,10 @@ static vtr::vector net_timing_cost; //Like connection_timi static vtr::vector bb_coords, bb_num_on_edges; +/* Determines if slacks/criticalities need to be updated */ +static bool do_update_criticalities = true; +static bool do_update_setup_slacks = true; + /* Determines if slacks/criticalities need to be recomputed from scratch */ static bool do_recompute_criticalities = true; static bool do_recompute_setup_slacks = true; @@ -428,13 +432,6 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info); -static void recompute_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_costs* costs); - static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info); @@ -542,6 +539,7 @@ void try_place(const t_placer_opts& placer_opts, std::shared_ptr placement_delay_calc; std::unique_ptr place_delay_model; std::unique_ptr move_generator; + std::unique_ptr placer_setup_slacks; std::unique_ptr placer_criticalities; std::unique_ptr pin_timing_invalidator; @@ -1001,7 +999,7 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, criticalities, pin_timing_invalidator, timing_info, - &costs); + costs); *outer_crit_iter_count = 0; } @@ -1019,7 +1017,6 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info) { - //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); @@ -1039,7 +1036,7 @@ static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, } //Update timing information based on current placement by running STA -//and calculate the updated criticalities and timing costs +//and calculate the updated criticalities and timing costs //(based on the new setup slacks) static void update_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, @@ -1047,7 +1044,6 @@ static void update_criticalities(float crit_exponent, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, t_placer_costs* costs) { - //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); @@ -1074,7 +1070,7 @@ static void update_criticalities(float crit_exponent, } //Update timing information based on current placement by running STA. -//Record the new slack information as well as calculate the updated +//Record the new slack information as well as calculate the updated //criticalities and timing costs (based on the new setup slacks) // //If both setup slacks and criticalities need to be updated, @@ -1087,7 +1083,6 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, t_placer_costs* costs) { - //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); @@ -1113,7 +1108,6 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, pin_timing_invalidator->reset(); } - /* Function which contains the inner loop of the simulated annealing */ static void placement_inner_loop(float t, int temp_num, diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 3043c0e1089..f7d940dfd5f 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -29,7 +29,7 @@ PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, cons } void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent, bool recompute) { - //If the criticalities are not updated immediately after each time we call + //If the criticalities are not updated immediately after each time we call //timing_info->update(), then timing_info->pins_with_modified_setup_criticality() //cannot accurately account for all the pins that need to be updated. //In this case, we pass in recompute=true to update all criticalities from scratch. @@ -40,7 +40,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf if (!recompute && crit_exponent == last_crit_exponent_ && INCR_UPDATE_CRITICALITIES) { incr_update_criticalities(timing_info); } else { - recompute_criticalities(timing_info); + recompute_criticalities(); //Record new criticality exponent last_crit_exponent_ = crit_exponent; @@ -88,7 +88,7 @@ void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timin } } -void PlacerCriticalities::recompute_criticalities(const SetupTimingInfo* timing_info) { +void PlacerCriticalities::recompute_criticalities() { cluster_pins_with_modified_criticality_.clear(); //Non-incremental: all sink pins need updating @@ -117,14 +117,14 @@ PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const Cl } void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info, bool recompute) { - //If the setup slacks are not updated immediately after each time we call + //If the setup slacks are not updated immediately after each time we call //timing_info->update(), then timing_info->pins_with_modified_setup_slack() //cannot accurately account for all the pins that need to be updated. //In this case, we pass in recompute=true to update all setup slacks from scratch. if (!recompute && INCR_UPDATE_SETUP_SLACKS) { incr_update_setup_slacks(timing_info); } else { - recompute_setup_slacks(timing_info); + recompute_setup_slacks(); } //Update the effected pins @@ -157,7 +157,7 @@ void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_i } } -void PlacerSetupSlacks::recompute_setup_slacks(const SetupTimingInfo* timing_info) { +void PlacerSetupSlacks::recompute_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); //Non-incremental: all sink pins need updating diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index fff1c6ab5f1..d37983730f5 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -82,9 +82,9 @@ class PlacerCriticalities { //Set of pins with criticaltites modified by last call to update_criticalities() vtr::vec_id_set cluster_pins_with_modified_criticality_; - //Updates criticalities: incremental V.S. from scratch + //Updates criticalities: incremental V.S. from scratch void incr_update_criticalities(const SetupTimingInfo* timing_info); - void recompute_criticalities(const SetupTimingInfo* timing_info); + void recompute_criticalities(); }; /* Usage @@ -120,7 +120,7 @@ class PlacerSetupSlacks { public: //Accessors //Returns the setup slack of the specified connection - float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slack_[net][ipin]; } + float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; } //Returns the range of clustered netlist pins (i.e. ClusterPinIds) which were modified //by the last call to update_setup_slacks() @@ -142,9 +142,9 @@ class PlacerSetupSlacks { //Set of pins with criticaltites modified by last call to update_criticalities() vtr::vec_id_set cluster_pins_with_modified_setup_slack_; - //Updates setup slacks: incremental V.S. from scratch + //Updates setup slacks: incremental V.S. from scratch void incr_update_setup_slacks(const SetupTimingInfo* timing_info); - void recompute_setup_slacks(const SetupTimingInfo* timing_info); + void recompute_setup_slacks(); }; /* Usage diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 6ad86f4ca43..5bff2ac8324 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -579,7 +579,7 @@ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, cons return clb_pin_crit; } -//Return the slack of a net's pin in the CLB netlist +//Return the setup slack of a net's pin in the CLB netlist float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) { //There may be multiple atom netlist pins connected to this CLB pin float clb_pin_setup_slack = std::numeric_limits::quiet_NaN(); diff --git a/vpr/src/timing/timing_util.h b/vpr/src/timing/timing_util.h index 87f6b86787b..682771e9763 100644 --- a/vpr/src/timing/timing_util.h +++ b/vpr/src/timing/timing_util.h @@ -183,6 +183,9 @@ class ClusteredPinTimingInvalidator { //Return the criticality of a net's pin in the CLB netlist float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin); +//Return the setup slack of a net's pin in the CLB netlist +float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin); + //Returns the worst (maximum) criticality of the set of slack tags specified. Requires the maximum //required time and worst slack for all domain pairs represent by the slack tags // From 63db2e165d8bf822c7c3b94d26a71597870bab1b Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 31 Jul 2020 00:41:10 -0400 Subject: [PATCH 04/24] Merged 3 update routines into 1 single routine --- vpr/src/place/place.cpp | 166 ++++++++++++++-------------------------- 1 file changed, 59 insertions(+), 107 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 81a32b4acee..2dfff1fd311 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -429,20 +429,10 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, int* outer_crit_iter_count, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info); -static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info); - -static void update_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_costs* costs); - static void update_setup_slacks_and_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, @@ -465,6 +455,7 @@ static void placement_inner_loop(float t, ClusteredPinTimingInvalidator* pin_timing_invalidator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info); @@ -617,12 +608,15 @@ void try_place(const t_placer_opts& placer_opts, atom_ctx.lookup, *timing_info->timing_graph()); //Update timing and costs - update_criticalities(crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); + do_update_criticalities = true; + do_update_setup_slacks = false; + update_setup_slacks_and_criticalities(crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &costs); timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement @@ -779,6 +773,7 @@ void try_place(const t_placer_opts& placer_opts, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get()); @@ -790,6 +785,7 @@ void try_place(const t_placer_opts& placer_opts, pin_timing_invalidator.get(), place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), *move_generator, blocks_affected, timing_info.get()); @@ -844,6 +840,7 @@ void try_place(const t_placer_opts& placer_opts, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get()); @@ -859,6 +856,7 @@ void try_place(const t_placer_opts& placer_opts, pin_timing_invalidator.get(), place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), *move_generator, blocks_affected, timing_info.get()); @@ -911,12 +909,15 @@ void try_place(const t_placer_opts& placer_opts, VTR_ASSERT(timing_info); //Update timing and costs - update_criticalities(crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); + do_update_criticalities = true; + do_update_setup_slacks = false; + update_setup_slacks_and_criticalities(crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &costs); critical_path = timing_info->least_slack_critical_path(); @@ -978,6 +979,7 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, int* outer_crit_iter_count, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info) { if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) @@ -993,13 +995,16 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, num_connections = std::max(num_connections, 1); //Avoid division by zero VTR_ASSERT(num_connections > 0); - //Update timing information - update_criticalities(crit_exponent, - delay_model, - criticalities, - pin_timing_invalidator, - timing_info, - costs); + //Update timing information and criticalities + do_update_criticalities = true; + do_update_setup_slacks = false; + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); *outer_crit_iter_count = 0; } @@ -1012,70 +1017,9 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, prev_inverse_costs->timing_cost = min(1 / costs->timing_cost, MAX_INV_TIMING_COST); } -//Update timing information based on current placement by running STA -//and record the new setup slack information -static void update_setup_slacks(PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info) { - //Run STA to update slacks and adjusted/relaxed criticalities - timing_info->update(); - - //Update placer's setup slacks - setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); - - //Setup slacks are now in sync with the timing_info - //Can perform incremental updates next time - do_recompute_setup_slacks = false; - - //Criticalities are now out of sync with the timing_info - //Must do from scratch recompute next time - do_recompute_criticalities = true; - - //Clear invalidation state - pin_timing_invalidator->reset(); -} - -//Update timing information based on current placement by running STA -//and calculate the updated criticalities and timing costs -//(based on the new setup slacks) -static void update_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_costs* costs) { - //Run STA to update slacks and adjusted/relaxed criticalities - timing_info->update(); - - //Update placer's criticalities (e.g. sharpen with crit_exponent) - criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); - - //Update connection, net and total timing costs based on new criticalities -#ifdef INCR_COMP_TD_COSTS - update_td_costs(delay_model, *criticalities, &costs->timing_cost); -#else - comp_td_costs(delay_model, *criticalities, &costs->timing_cost); -#endif - - //Criticalities are now in sync with the timing_info - //Can perform incremental updates next time - do_recompute_criticalities = false; - - //Setup slacks are now out of sync with the timing_info - //Must do from scratch recompute next time - do_recompute_setup_slacks = true; - - //Clear invalidation state - pin_timing_invalidator->reset(); -} - //Update timing information based on current placement by running STA. //Record the new slack information as well as calculate the updated //criticalities and timing costs (based on the new setup slacks) -// -//If both setup slacks and criticalities need to be updated, -//this routine should be called, instead of individual update routine. -//This is to prevent unnecessary from scratch updates static void update_setup_slacks_and_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, @@ -1087,22 +1031,26 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, timing_info->update(); //Update placer's setup slacks - setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); + if (do_update_setup_slacks) { + setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); + } - //Update placer's criticalities (e.g. sharpen with crit_exponent) - criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); + if (do_update_criticalities) { + //Update placer's criticalities (e.g. sharpen with crit_exponent) + criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); - //Update connection, net and total timing costs based on new criticalities + //Update connection, net and total timing costs based on new criticalities #ifdef INCR_COMP_TD_COSTS - update_td_costs(delay_model, *criticalities, &costs->timing_cost); + update_td_costs(delay_model, *criticalities, &costs->timing_cost); #else - comp_td_costs(delay_model, *criticalities, &costs->timing_cost); + comp_td_costs(delay_model, *criticalities, &costs->timing_cost); #endif + } - //Both Setup slacks and Criticalities are now in sync with the timing_info - //They can be both incrementally updated next time - do_recompute_setup_slacks = false; - do_recompute_criticalities = false; + //Setup slacks and Criticalities need to be in sync with the timing_info + //Otherwise, they cannot be incrementally updated on the next iteration + do_recompute_setup_slacks = !do_update_setup_slacks; + do_recompute_criticalities = !do_update_criticalities; //Clear invalidation state pin_timing_invalidator->reset(); @@ -1123,6 +1071,7 @@ static void placement_inner_loop(float t, ClusteredPinTimingInvalidator* pin_timing_invalidator, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info) { @@ -1180,12 +1129,15 @@ static void placement_inner_loop(float t, * criticalities and update the timing cost since it will change. */ //Update timing information - update_criticalities(crit_exponent, - delay_model, - criticalities, - pin_timing_invalidator, - timing_info, - costs); + do_update_criticalities = true; + do_update_setup_slacks = false; + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); } inner_crit_iter_count++; } From e8f73c63494374356ada911f7a947670ea1cb16a Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 31 Jul 2020 00:50:38 -0400 Subject: [PATCH 05/24] Resolve more merge conflicts --- vpr/src/place/place.cpp | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 55eff6bf35d..a3ddf6dfd3f 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -782,7 +782,6 @@ void try_place(const t_placer_opts& placer_opts, costs.cost = 1; } -<<<<<<< HEAD outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, num_connections, crit_exponent, @@ -792,16 +791,6 @@ void try_place(const t_placer_opts& placer_opts, placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get()); -======= - outer_loop_recompute_criticalities(placer_opts, &costs, &prev_inverse_costs, - num_connections, - state.crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get()); ->>>>>>> sync placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, state.move_lim, state.crit_exponent, inner_recompute_limit, &stats, @@ -850,8 +839,7 @@ void try_place(const t_placer_opts& placer_opts, auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - -<<<<<<< HEAD + outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, num_connections, @@ -862,17 +850,6 @@ void try_place(const t_placer_opts& placer_opts, placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get()); -======= - outer_loop_recompute_criticalities(placer_opts, &costs, - &prev_inverse_costs, - num_connections, - state.crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get()); ->>>>>>> sync state.t = 0; /* freeze out */ @@ -938,7 +915,6 @@ void try_place(const t_placer_opts& placer_opts, VTR_ASSERT(timing_info); //Update timing and costs -<<<<<<< HEAD do_update_criticalities = true; do_update_setup_slacks = false; update_setup_slacks_and_criticalities(crit_exponent, @@ -948,14 +924,6 @@ void try_place(const t_placer_opts& placer_opts, pin_timing_invalidator.get(), timing_info.get(), &costs); -======= - recompute_criticalities(state.crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); ->>>>>>> sync critical_path = timing_info->least_slack_critical_path(); From d80de58cb846a39ea5f8217ccd0af143e834f2cd Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 31 Jul 2020 01:54:22 -0400 Subject: [PATCH 06/24] Changed crit_exponent to first_crit_exponent/state.crit_exponent --- vpr/src/place/place.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index a3ddf6dfd3f..22debfa44a2 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -628,7 +628,7 @@ void try_place(const t_placer_opts& placer_opts, //Update timing and costs do_update_criticalities = true; do_update_setup_slacks = false; - update_setup_slacks_and_criticalities(crit_exponent, + update_setup_slacks_and_criticalities(first_crit_exponent, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), @@ -784,7 +784,7 @@ void try_place(const t_placer_opts& placer_opts, outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, num_connections, - crit_exponent, + state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), @@ -843,7 +843,7 @@ void try_place(const t_placer_opts& placer_opts, outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, num_connections, - crit_exponent, + state.crit_exponent, &outer_crit_iter_count, place_delay_model.get(), placer_criticalities.get(), @@ -917,7 +917,7 @@ void try_place(const t_placer_opts& placer_opts, //Update timing and costs do_update_criticalities = true; do_update_setup_slacks = false; - update_setup_slacks_and_criticalities(crit_exponent, + update_setup_slacks_and_criticalities(state.crit_exponent, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), From 831df449f4931e0cd7a1202b4105fe505421d156 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 6 Aug 2020 02:18:19 -0400 Subject: [PATCH 07/24] Created a setup slack matrix that copies data from the PlacerSetupSlacks. The matrix update is incremental according to the pins with modified setup slacks returned from PlacerSetupSlacks. Outer loop routine now updates both setup slacks and criticalities, while the inner loop routine passes in variables that determine the strategies/cost functions used to evaluate the effectiveness of try_swap moves. --- vpr/src/place/place.cpp | 155 ++++++++++++++++++++++++++-------------- 1 file changed, 102 insertions(+), 53 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 22debfa44a2..c63599705a3 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -150,18 +150,20 @@ static vtr::vector bb_updated_before; * Net connection delays based on the placement. * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ -static ClbNetPinsMatrix connection_delay; //Delays based on commited block positions +static ClbNetPinsMatrix connection_delay; //Delays based on committed block positions static ClbNetPinsMatrix proposed_connection_delay; //Delays for proposed block positions (only // for connections effected by move, otherwise // INVALID_DELAY) +static ClbNetPinsMatrix connection_setup_slack; //Setup slacks based on most recently updated timing graph + /* * Timing cost of connections (i.e. criticality * delay). * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ -static PlacerTimingCosts connection_timing_cost; //Costs of commited block positions +static PlacerTimingCosts connection_timing_cost; //Costs of committed block positions static ClbNetPinsMatrix proposed_connection_timing_cost; //Costs for proposed block positions - // (only for connectsion effected by + // (only for connection effected by // move, otherwise INVALID_DELAY) /* @@ -386,6 +388,8 @@ static float comp_td_connection_delay(const PlaceDelayModel* delay_model, Cluste static void comp_td_connection_delays(const PlaceDelayModel* delay_model); +static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks); + static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); @@ -439,17 +443,17 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr); static void free_try_swap_arrays(); -static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, - t_placer_costs* costs, - t_placer_prev_inverse_costs* prev_inverse_costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info); +static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, + t_placer_costs* costs, + t_placer_prev_inverse_costs* prev_inverse_costs, + int num_connections, + float crit_exponent, + int* outer_crit_iter_count, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info); static void update_setup_slacks_and_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, @@ -474,6 +478,8 @@ static void placement_inner_loop(float t, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, + bool inner_loop_update_crit, + bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info); @@ -627,7 +633,7 @@ void try_place(const t_placer_opts& placer_opts, *timing_info->timing_graph()); //Update timing and costs do_update_criticalities = true; - do_update_setup_slacks = false; + do_update_setup_slacks = true; update_setup_slacks_and_criticalities(first_crit_exponent, place_delay_model.get(), placer_criticalities.get(), @@ -636,6 +642,9 @@ void try_place(const t_placer_opts& placer_opts, timing_info.get(), &costs); + //Initialize the setup slacks matrix + record_setup_slacks(placer_setup_slacks.get()); + timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement critical_path = timing_info->least_slack_critical_path(); @@ -782,15 +791,17 @@ void try_place(const t_placer_opts& placer_opts, costs.cost = 1; } - outer_loop_update_criticalities(placer_opts, &costs, &prev_inverse_costs, - num_connections, - state.crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - placer_setup_slacks.get(), - pin_timing_invalidator.get(), - timing_info.get()); + outer_loop_update_timing_info(placer_opts, &costs, &prev_inverse_costs, + num_connections, + state.crit_exponent, + &outer_crit_iter_count, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get()); + + bool anneal_update_crit = true, anneal_update_setup_slack = false; placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, state.move_lim, state.crit_exponent, inner_recompute_limit, &stats, @@ -801,6 +812,8 @@ void try_place(const t_placer_opts& placer_opts, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), + anneal_update_crit, + anneal_update_setup_slack, *move_generator, blocks_affected, timing_info.get()); @@ -839,20 +852,23 @@ void try_place(const t_placer_opts& placer_opts, auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - - outer_loop_update_criticalities(placer_opts, &costs, - &prev_inverse_costs, - num_connections, - state.crit_exponent, - &outer_crit_iter_count, - place_delay_model.get(), - placer_criticalities.get(), - placer_setup_slacks.get(), - pin_timing_invalidator.get(), - timing_info.get()); + + outer_loop_update_timing_info(placer_opts, &costs, + &prev_inverse_costs, + num_connections, + state.crit_exponent, + &outer_crit_iter_count, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get()); state.t = 0; /* freeze out */ + //Analyze setup slacks for quench + bool quench_update_crit = true, quench_update_setup_slack = true; + /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, @@ -864,6 +880,8 @@ void try_place(const t_placer_opts& placer_opts, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), + quench_update_crit, + quench_update_setup_slack, *move_generator, blocks_affected, timing_info.get()); @@ -976,18 +994,18 @@ void try_place(const t_placer_opts& placer_opts, VTR_LOG("update_td_costs: connections %g nets %g sum_nets %g total %g\n", f_update_td_costs_connections_elapsed_sec, f_update_td_costs_nets_elapsed_sec, f_update_td_costs_sum_nets_elapsed_sec, f_update_td_costs_total_elapsed_sec); } -/* Function to update the criticalities before the inner loop of the annealing */ -static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, - t_placer_costs* costs, - t_placer_prev_inverse_costs* prev_inverse_costs, - int num_connections, - float crit_exponent, - int* outer_crit_iter_count, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info) { +/* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ +static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, + t_placer_costs* costs, + t_placer_prev_inverse_costs* prev_inverse_costs, + int num_connections, + float crit_exponent, + int* outer_crit_iter_count, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info) { if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) return; @@ -1003,7 +1021,7 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, //Update timing information and criticalities do_update_criticalities = true; - do_update_setup_slacks = false; + do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, @@ -1011,6 +1029,8 @@ static void outer_loop_update_criticalities(const t_placer_opts& placer_opts, pin_timing_invalidator, timing_info, costs); + //Always record the setup slacks + record_setup_slacks(setup_slacks); *outer_crit_iter_count = 0; } @@ -1036,8 +1056,8 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); - //Update placer's setup slacks if (do_update_setup_slacks) { + //Update placer's setup slacks setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); } @@ -1053,7 +1073,7 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, #endif } - //Setup slacks and Criticalities need to be in sync with the timing_info + //Setup slacks and criticalities need to be in sync with the timing_info //Otherwise, they cannot be incrementally updated on the next iteration do_recompute_setup_slacks = !do_update_setup_slacks; do_recompute_criticalities = !do_update_criticalities; @@ -1078,6 +1098,8 @@ static void placement_inner_loop(float t, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, + bool inner_loop_update_crit, + bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info) { @@ -1135,8 +1157,8 @@ static void placement_inner_loop(float t, * criticalities and update the timing cost since it will change. */ //Update timing information - do_update_criticalities = true; - do_update_setup_slacks = false; + do_update_criticalities = inner_loop_update_crit; + do_update_setup_slacks = inner_loop_update_setup_slack; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, @@ -1144,6 +1166,17 @@ static void placement_inner_loop(float t, pin_timing_invalidator, timing_info, costs); + + //Currently, if we update the setup slacks within the inner loop + //We aim to evaluate moves based upon the cost functions + //related to these setup slacks + bool do_setup_slack_analysis = inner_loop_update_setup_slack; + if (do_setup_slack_analysis) { + //Currently, we accept these new setup slacks right away + //TODO: Consider situations where we reject the series of moves + //that lead to the current slack values. + record_setup_slacks(setup_slacks); + } } inner_crit_iter_count++; } @@ -1814,7 +1847,7 @@ static float comp_td_connection_delay(const PlaceDelayModel* delay_model, Cluste //Recompute all point to point delays, updating connection_delay static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); for (auto net_id : cluster_ctx.clb_nlist.nets()) { for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ++ipin) { @@ -1823,6 +1856,19 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { } } +//Copy all the current setup slacks from the PlacerSetupSlacks class +static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks) { + const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + //Only go through pins with modified setup slack + for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); + + connection_setup_slack[net_id][pin_index_in_net] = setup_slacks->setup_slack(net_id, pin_index_in_net); + } +} + /* Update the connection_timing_cost values from the temporary * * values for all connections that have changed. */ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { @@ -2144,6 +2190,8 @@ static void alloc_and_load_placement_structs(float place_cost_exp, connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); proposed_connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); + connection_setup_slack = make_net_pins_matrix(cluster_ctx.clb_nlist, std::numeric_limits::infinity()); + connection_timing_cost = PlacerTimingCosts(cluster_ctx.clb_nlist); proposed_connection_timing_cost = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.); net_timing_cost.resize(num_nets, 0.); @@ -2185,6 +2233,7 @@ static void free_placement_structs(const t_placer_opts& placer_opts) { if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { vtr::release_memory(connection_timing_cost); vtr::release_memory(connection_delay); + vtr::release_memory(connection_setup_slack); vtr::release_memory(proposed_connection_timing_cost); vtr::release_memory(proposed_connection_delay); From d329911beed081646bc13ea7acb0f5a23fdac330 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 6 Aug 2020 03:45:27 -0400 Subject: [PATCH 08/24] Provided more complete explanation for the record_setup_slacks routine. --- vpr/src/place/place.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index c63599705a3..5f83493b082 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1029,7 +1029,7 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, pin_timing_invalidator, timing_info, costs); - //Always record the setup slacks + //Always record the setup slacks when they are updated record_setup_slacks(setup_slacks); *outer_crit_iter_count = 0; @@ -1167,14 +1167,15 @@ static void placement_inner_loop(float t, timing_info, costs); - //Currently, if we update the setup slacks within the inner loop - //We aim to evaluate moves based upon the cost functions + //Currently, if we update the setup slacks within the inner loop, + //we do so to evaluate moves based upon the cost functions //related to these setup slacks - bool do_setup_slack_analysis = inner_loop_update_setup_slack; - if (do_setup_slack_analysis) { - //Currently, we accept these new setup slacks right away - //TODO: Consider situations where we reject the series of moves - //that lead to the current slack values. + // + //If we do not update the setup slacks, we do not alter the values + //in the setup slacks matrix. Otherwise, the incremental update + //method of the routine record_setup_slacks will become dysfunctional. + if (inner_loop_update_setup_slack) { + //TODO: add slack cost evaluation functions record_setup_slacks(setup_slacks); } } @@ -1857,6 +1858,8 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { } //Copy all the current setup slacks from the PlacerSetupSlacks class +//This routine will always be incremental and correct, as it is called +//if and only if the PlacerSetupSlacks class is updated with new slack values static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; From 225870e19d9fe2ef12462fa0a8927353693d008c Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 6 Aug 2020 06:01:29 -0400 Subject: [PATCH 09/24] Added placement snapshot functions that facilitates the reversion of a series of successful moves done by try_swap. Right now the data structures representing the state variables are directly being copied, however the process can possibly be optimized with incremental techniques. The snapshot routines are called in the placement's inner loop, and should be used together with VPR options quench_recompute_divider and less optimally inner_recompute_divider. The latter would be too time consuming in practice. --- vpr/src/place/place.cpp | 136 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 5f83493b082..d8076ca083e 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -390,6 +390,8 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model); static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks); +static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks); + static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); @@ -514,6 +516,20 @@ static void print_resources_utilization(); static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent); +//Placement snapshot data structures. To be optimized. +static ClbNetPinsMatrix connection_delay_snapshot; +static PlacerTimingCosts connection_timing_cost_snapshot; +static vtr::vector bb_coords_snapshot, bb_num_on_edges_snapshot; +static vtr::vector net_cost_snapshot; +static vtr::vector bb_updated_before_snapshot; +static vtr::vector_map block_locs_snapshot; +static vtr::Matrix grid_blocks_snapshot; + +static void take_placement_snapshot(); +static void revert_placement_snapshot(ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info); + +static e_move_result do_setup_slack_cost_analysis(const PlacerSetupSlacks* setup_slacks); + /*****************************************************************************/ void try_place(const t_placer_opts& placer_opts, t_annealing_sched annealing_sched, @@ -849,6 +865,11 @@ void try_place(const t_placer_opts& placer_opts, } while (update_annealing_state(&state, success_rat, costs, placer_opts, annealing_sched)); /* Outer loop of the simmulated annealing ends */ + if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + //Take a snapshot of the current placer before doing placement quench + take_placement_snapshot(); + } + auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); @@ -1175,8 +1196,39 @@ static void placement_inner_loop(float t, //in the setup slacks matrix. Otherwise, the incremental update //method of the routine record_setup_slacks will become dysfunctional. if (inner_loop_update_setup_slack) { - //TODO: add slack cost evaluation functions - record_setup_slacks(setup_slacks); + e_move_result slack_result = do_setup_slack_cost_analysis(setup_slacks); + + if (slack_result == ACCEPTED) { + //If accepted, update the setup slack matrix + //and take a snapshot of the current placement + record_setup_slacks(setup_slacks); + take_placement_snapshot(); + } else { + VTR_ASSERT(slack_result == REJECTED); + + //If rejected, undo all the moves since the last timing info update + //i.e., revert to the last placement snapshot + // + //Invalidate all the timing edges and do a new timing_info->update() + // + //Leave the setup slack matrix unchanged + revert_placement_snapshot(pin_timing_invalidator, timing_info); + + //Update timing information + do_update_criticalities = true; + do_update_setup_slacks = true; + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); + + VTR_ASSERT_MSG( + verify_connection_setup_slacks(setup_slacks), + "The setup slacks should not change after reverting to the last placement snapshot and updating the timing info."); + } } } inner_crit_iter_count++; @@ -1212,6 +1264,13 @@ static void placement_inner_loop(float t, /* Inner loop ends */ } +//Evaluate if the new slack values are acceptable using weighted average cost functions +static e_move_result do_setup_slack_cost_analysis(const PlacerSetupSlacks* setup_slacks) { + //TODO: implement the cost functions + int num = rand() % 2; + return num ? ACCEPTED : REJECTED; +} + static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, @@ -1863,7 +1922,7 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - //Only go through pins with modified setup slack + //Only go through sink pins with modified setup slack for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { ClusterNetId net_id = clb_nlist.pin_net(pin_id); size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); @@ -1872,6 +1931,21 @@ static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks) { } } +static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) { + const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + //Go through every single sink pin + for (ClusterNetId net_id : clb_nlist.nets()) { + for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { + if (connection_setup_slack[net_id][ipin] != setup_slacks->setup_slack(net_id, ipin)) { + return false; + } + } + } + + return true; +} + /* Update the connection_timing_cost values from the temporary * * values for all connections that have changed. */ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { @@ -3104,3 +3178,59 @@ static void init_annealing_state(t_annealing_state* state, bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE); } + +//Recording down all the info about the placer's current state +static void take_placement_snapshot() { + const auto& place_ctx = g_vpr_ctx.placement(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const auto& clb_nlist = cluster_ctx.clb_nlist; + + connection_delay_snapshot = connection_delay; + //Go through every single sink pin to check if delay has been updated + for (ClusterNetId net_id : clb_nlist.nets()) { + for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { + VTR_ASSERT_MSG(connection_delay[net_id][ipin] == connection_delay_snapshot[net_id][ipin], + "Direct assignment of the delay has failed"); + } + } + + connection_timing_cost_snapshot = connection_timing_cost; + bb_coords_snapshot = bb_coords; + bb_num_on_edges_snapshot = bb_num_on_edges; + net_cost_snapshot = net_cost; + bb_updated_before_snapshot = bb_updated_before; + + block_locs_snapshot = place_ctx.block_locs; + grid_blocks_snapshot = place_ctx.grid_blocks; +} + +//Revert back to the recorded placer state, which is the state +//of the placer when the last timing info update took place +static void revert_placement_snapshot(ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info) { + auto& place_ctx = g_vpr_ctx.mutable_placement(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const auto& clb_nlist = cluster_ctx.clb_nlist; + + //Go through every single sink pin to check if delay has changed + for (ClusterNetId net_id : clb_nlist.nets()) { + for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { + if (connection_delay[net_id][ipin] != connection_delay_snapshot[net_id][ipin]) { + //Delay changed, must invalidate + ClusterPinId pin_id = clb_nlist.net_pin(net_id, ipin); + pin_tedges_invalidator->invalidate_connection(pin_id, timing_info); + connection_delay[net_id][ipin] = connection_delay_snapshot[net_id][ipin]; + } + } + } + + connection_timing_cost = connection_timing_cost_snapshot; + bb_coords = bb_coords_snapshot; + bb_num_on_edges = bb_num_on_edges_snapshot; + net_cost = net_cost_snapshot; + bb_updated_before = bb_updated_before_snapshot; + + place_ctx.block_locs = block_locs_snapshot; + place_ctx.grid_blocks = grid_blocks_snapshot; +} \ No newline at end of file From 0e01ed7e68b2c0589771a918d2e54fbc0a28e9a5 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 6 Aug 2020 07:33:09 -0400 Subject: [PATCH 10/24] Implemented do_setup_slack_cost_analysis: softmax of negative slacks --- vpr/src/place/place.cpp | 63 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index d8076ca083e..1972c79cc80 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1266,9 +1266,66 @@ static void placement_inner_loop(float t, //Evaluate if the new slack values are acceptable using weighted average cost functions static e_move_result do_setup_slack_cost_analysis(const PlacerSetupSlacks* setup_slacks) { - //TODO: implement the cost functions - int num = rand() % 2; - return num ? ACCEPTED : REJECTED; + const auto& cluster_ctx = g_vpr_ctx.clustering(); + + const auto& clb_nlist = cluster_ctx.clb_nlist; + + //Aggregating the total negative slack. Skip pins with positive slacks + float total_negative_slack = 0.f; + std::vector pins_with_negative_slack; + size_t num_pins_with_negative_slack; + + for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); + + if (connection_setup_slack[net_id][pin_index_in_net] < 0) { + pins_with_negative_slack.push_back(pin_id); + ++num_pins_with_negative_slack; + total_negative_slack += connection_setup_slack[net_id][pin_index_in_net]; + } + } + + //Variables for storing weights and values + float weight, frac_changed; + float total_cost = 0.f; + + std::ofstream osa("a.out", std::ofstream::app); + std::ofstream osb("b.out", std::ofstream::app); + + for (ClusterPinId pin_id : pins_with_negative_slack) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); + + //The slack values in PlacerSetupSlacks have not been updated to connection_setup_slack + //These values are in the proposed state: they might be accepted or rejected + float proposed_setup_slack = setup_slacks->setup_slack(net_id, pin_index_in_net); + float original_setup_slack = connection_setup_slack[net_id][pin_index_in_net]; + + //The worse the slack of a pin, the more weight it is given + //Currently, first normalize, then apply the Softmax function, + //which takes the exponential of the opposite value of the + //normalized slack value and then normalize again. More negative + //slacks should take on a much larger weight. + weight = std::exp(original_setup_slack / total_negative_slack); + osa << weight << ' '; + + //The fraction by which the slack value has changed. + //Positive->good, negative->bad. + frac_changed = (proposed_setup_slack - original_setup_slack) / original_setup_slack; + osb << frac_changed << ' '; + + //Using minus due to the definition of cost: lower cost is better + total_cost -= frac_changed * weight; + } + osa << '\n'; + osb << '\n'; + osa.close(); + osb.close(); + + //Currently, as long as the total cost is negative, the moves + //by the try_swap routine are accepted. + return total_cost < 0 ? ACCEPTED : REJECTED; } static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, From 2e212dcbe5be359f389f28784a30f295d8301915 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 11 Aug 2020 06:11:26 -0400 Subject: [PATCH 11/24] Added single move reversion for setup slack analysis(rather than taking placement snapshots). Currently experiencing consistency failures. Also updated slack analysis cost function: comparing the worse slack change across all modified clb pins --- vpr/src/base/vpr_types.h | 3 +- vpr/src/place/place.cpp | 403 ++++++++++++++++++--------------------- 2 files changed, 185 insertions(+), 221 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 1e40ccfaef2..34f08d250f6 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -851,7 +851,8 @@ struct t_annealing_sched { * doPlacement: true if placement is supposed to be done in the CAD flow, false otherwise */ enum e_place_algorithm { BOUNDING_BOX_PLACE, - PATH_TIMING_DRIVEN_PLACE + PATH_TIMING_DRIVEN_PLACE, + SETUP_SLACK_ANALYSIS_PLACE }; enum e_place_effort_scaling { diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 1972c79cc80..258c23875aa 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -331,15 +331,17 @@ static void update_move_nets(int num_nets_affected); static void reset_move_nets(int num_nets_affected); static e_move_result try_swap(float t, + float crit_exponent, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, MoveGenerator& move_generator, - TimingInfo* timing_info, + SetupTimingInfo* timing_info, ClusteredPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, float rlim_escape_fraction, enum e_place_algorithm place_algorithm, float timing_tradeoff); @@ -357,14 +359,16 @@ static int check_placement_consistency(); static int check_block_placement_consistency(); static int check_macro_placement_consistency(); -static float starting_t(t_placer_costs* costs, +static float starting_t(float crit_exponent, + t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, int max_moves, float rlim, const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - TimingInfo* timing_info, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, MoveGenerator& move_generator, ClusteredPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, @@ -388,7 +392,7 @@ static float comp_td_connection_delay(const PlaceDelayModel* delay_model, Cluste static void comp_td_connection_delays(const PlaceDelayModel* delay_model); -static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks); +static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks); static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks); @@ -396,7 +400,7 @@ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); -static void invalidate_affected_connection_delays(const t_pl_blocks_to_be_moved& blocks_affected, +static void invalidate_affected_connection_delays(const std::vector& sink_pins_affected, ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info); @@ -410,6 +414,9 @@ static double comp_td_connection_cost(const PlaceDelayModel* delay_mode, const P static double sum_td_net_cost(ClusterNetId net); static double sum_td_costs(); +static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, + std::vector& sink_pins_affected); + static e_move_result assess_swap(double delta_c, double t); static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new); @@ -484,7 +491,8 @@ static void placement_inner_loop(float t, bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info); + SetupTimingInfo* timing_info, + enum e_place_algorithm place_algorithm); static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const PlaceDelayModel* delay_model, @@ -516,19 +524,7 @@ static void print_resources_utilization(); static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent); -//Placement snapshot data structures. To be optimized. -static ClbNetPinsMatrix connection_delay_snapshot; -static PlacerTimingCosts connection_timing_cost_snapshot; -static vtr::vector bb_coords_snapshot, bb_num_on_edges_snapshot; -static vtr::vector net_cost_snapshot; -static vtr::vector bb_updated_before_snapshot; -static vtr::vector_map block_locs_snapshot; -static vtr::Matrix grid_blocks_snapshot; - -static void take_placement_snapshot(); -static void revert_placement_snapshot(ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info); - -static e_move_result do_setup_slack_cost_analysis(const PlacerSetupSlacks* setup_slacks); +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); /*****************************************************************************/ void try_place(const t_placer_opts& placer_opts, @@ -647,9 +643,11 @@ void try_place(const t_placer_opts& placer_opts, atom_ctx.nlist, atom_ctx.lookup, *timing_info->timing_graph()); - //Update timing and costs + //First time compute timing and costs, compute from scratch do_update_criticalities = true; do_update_setup_slacks = true; + do_recompute_criticalities = true; + do_recompute_setup_slacks = true; update_setup_slacks_and_criticalities(first_crit_exponent, place_delay_model.get(), placer_criticalities.get(), @@ -659,7 +657,7 @@ void try_place(const t_placer_opts& placer_opts, &costs); //Initialize the setup slacks matrix - record_setup_slacks(placer_setup_slacks.get()); + commit_setup_slacks(placer_setup_slacks.get()); timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement @@ -774,10 +772,11 @@ void try_place(const t_placer_opts& placer_opts, first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - float first_t = starting_t(&costs, &prev_inverse_costs, + float first_t = starting_t(first_crit_exponent, &costs, &prev_inverse_costs, annealing_sched, move_lim, first_rlim, place_delay_model.get(), placer_criticalities.get(), + placer_setup_slacks.get(), timing_info.get(), *move_generator, pin_timing_invalidator.get(), @@ -832,7 +831,8 @@ void try_place(const t_placer_opts& placer_opts, anneal_update_setup_slack, *move_generator, blocks_affected, - timing_info.get()); + timing_info.get(), + placer_opts.place_algorithm); tot_iter += state.move_lim; @@ -865,11 +865,6 @@ void try_place(const t_placer_opts& placer_opts, } while (update_annealing_state(&state, success_rat, costs, placer_opts, annealing_sched)); /* Outer loop of the simmulated annealing ends */ - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { - //Take a snapshot of the current placer before doing placement quench - take_placement_snapshot(); - } - auto pre_quench_timing_stats = timing_ctx.stats; { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); @@ -905,7 +900,8 @@ void try_place(const t_placer_opts& placer_opts, quench_update_setup_slack, *move_generator, blocks_affected, - timing_info.get()); + timing_info.get(), + e_place_algorithm::SETUP_SLACK_ANALYSIS_PLACE); tot_iter += move_lim; ++num_temps; @@ -1050,8 +1046,9 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, pin_timing_invalidator, timing_info, costs); - //Always record the setup slacks when they are updated - record_setup_slacks(setup_slacks); + + //Always commit the setup slacks when they are updated + commit_setup_slacks(setup_slacks); *outer_crit_iter_count = 0; } @@ -1123,7 +1120,8 @@ static void placement_inner_loop(float t, bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, - SetupTimingInfo* timing_info) { + SetupTimingInfo* timing_info, + enum e_place_algorithm place_algorithm) { int inner_crit_iter_count, inner_iter; int inner_placement_save_count = 0; //How many times have we dumped placement to a file this temperature? @@ -1138,15 +1136,16 @@ static void placement_inner_loop(float t, /* Inner loop begins */ for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { - e_move_result swap_result = try_swap(t, costs, prev_inverse_costs, rlim, + e_move_result swap_result = try_swap(t, crit_exponent, costs, prev_inverse_costs, rlim, move_generator, timing_info, pin_timing_invalidator, blocks_affected, delay_model, criticalities, + setup_slacks, placer_opts.rlim_escape_fraction, - placer_opts.place_algorithm, + place_algorithm, placer_opts.timing_tradeoff); if (swap_result == ACCEPTED) { @@ -1188,47 +1187,9 @@ static void placement_inner_loop(float t, timing_info, costs); - //Currently, if we update the setup slacks within the inner loop, - //we do so to evaluate moves based upon the cost functions - //related to these setup slacks - // - //If we do not update the setup slacks, we do not alter the values - //in the setup slacks matrix. Otherwise, the incremental update - //method of the routine record_setup_slacks will become dysfunctional. - if (inner_loop_update_setup_slack) { - e_move_result slack_result = do_setup_slack_cost_analysis(setup_slacks); - - if (slack_result == ACCEPTED) { - //If accepted, update the setup slack matrix - //and take a snapshot of the current placement - record_setup_slacks(setup_slacks); - take_placement_snapshot(); - } else { - VTR_ASSERT(slack_result == REJECTED); - - //If rejected, undo all the moves since the last timing info update - //i.e., revert to the last placement snapshot - // - //Invalidate all the timing edges and do a new timing_info->update() - // - //Leave the setup slack matrix unchanged - revert_placement_snapshot(pin_timing_invalidator, timing_info); - - //Update timing information - do_update_criticalities = true; - do_update_setup_slacks = true; - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - costs); - - VTR_ASSERT_MSG( - verify_connection_setup_slacks(setup_slacks), - "The setup slacks should not change after reverting to the last placement snapshot and updating the timing info."); - } + //Commit the setup slacks if they are updated + if (do_update_setup_slacks) { + commit_setup_slacks(setup_slacks); } } inner_crit_iter_count++; @@ -1264,68 +1225,33 @@ static void placement_inner_loop(float t, /* Inner loop ends */ } -//Evaluate if the new slack values are acceptable using weighted average cost functions -static e_move_result do_setup_slack_cost_analysis(const PlacerSetupSlacks* setup_slacks) { +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& clb_nlist = cluster_ctx.clb_nlist; - //Aggregating the total negative slack. Skip pins with positive slacks - float total_negative_slack = 0.f; - std::vector pins_with_negative_slack; - size_t num_pins_with_negative_slack; + //Find the original/proposed setup slacks of pins with modified values + std::vector original_setup_slacks, proposed_setup_slacks; - for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { - ClusterNetId net_id = clb_nlist.pin_net(pin_id); - size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); + for (ClusterPinId clb_pin : setup_slacks->pins_with_modified_setup_slack()) { + ClusterNetId net_id = clb_nlist.pin_net(clb_pin); + size_t ipin = clb_nlist.pin_net_index(clb_pin); - if (connection_setup_slack[net_id][pin_index_in_net] < 0) { - pins_with_negative_slack.push_back(pin_id); - ++num_pins_with_negative_slack; - total_negative_slack += connection_setup_slack[net_id][pin_index_in_net]; - } + original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); + proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); } - //Variables for storing weights and values - float weight, frac_changed; - float total_cost = 0.f; - - std::ofstream osa("a.out", std::ofstream::app); - std::ofstream osb("b.out", std::ofstream::app); + //If there are no pins with modified slack values, accept this move + //by returning an arbitrary negative number + if (original_setup_slacks.empty()) { + return -1; + } - for (ClusterPinId pin_id : pins_with_negative_slack) { - ClusterNetId net_id = clb_nlist.pin_net(pin_id); - size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); + //Sort in ascending order, from worse slack value to best + std::sort(original_setup_slacks.begin(), original_setup_slacks.end()); + std::sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); - //The slack values in PlacerSetupSlacks have not been updated to connection_setup_slack - //These values are in the proposed state: they might be accepted or rejected - float proposed_setup_slack = setup_slacks->setup_slack(net_id, pin_index_in_net); - float original_setup_slack = connection_setup_slack[net_id][pin_index_in_net]; - - //The worse the slack of a pin, the more weight it is given - //Currently, first normalize, then apply the Softmax function, - //which takes the exponential of the opposite value of the - //normalized slack value and then normalize again. More negative - //slacks should take on a much larger weight. - weight = std::exp(original_setup_slack / total_negative_slack); - osa << weight << ' '; - - //The fraction by which the slack value has changed. - //Positive->good, negative->bad. - frac_changed = (proposed_setup_slack - original_setup_slack) / original_setup_slack; - osb << frac_changed << ' '; - - //Using minus due to the definition of cost: lower cost is better - total_cost -= frac_changed * weight; - } - osa << '\n'; - osb << '\n'; - osa.close(); - osb.close(); - - //Currently, as long as the total cost is negative, the moves - //by the try_swap routine are accepted. - return total_cost < 0 ? ACCEPTED : REJECTED; + //Compare if the worse slack value has gotten worse or better + return original_setup_slacks.front() - proposed_setup_slacks.front(); } static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, @@ -1470,14 +1396,16 @@ static bool update_annealing_state(t_annealing_state* state, return true; } -static float starting_t(t_placer_costs* costs, +static float starting_t(float crit_exponent, + t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, int max_moves, float rlim, const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, - TimingInfo* timing_info, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + SetupTimingInfo* timing_info, MoveGenerator& move_generator, ClusteredPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, @@ -1501,13 +1429,19 @@ static float starting_t(t_placer_costs* costs, /* Try one move per block. Set t high so essentially all accepted. */ for (i = 0; i < move_lim; i++) { - e_move_result swap_result = try_swap(HUGE_POSITIVE_FLOAT, costs, prev_inverse_costs, rlim, + //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack + e_move_result swap_result = try_swap(HUGE_POSITIVE_FLOAT, + crit_exponent, + costs, + prev_inverse_costs, + rlim, move_generator, timing_info, pin_timing_invalidator, blocks_affected, delay_model, criticalities, + setup_slacks, placer_opts.rlim_escape_fraction, placer_opts.place_algorithm, placer_opts.timing_tradeoff); @@ -1572,15 +1506,17 @@ static void reset_move_nets(int num_nets_affected) { } static e_move_result try_swap(float t, + float crit_exponent, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, MoveGenerator& move_generator, - TimingInfo* timing_info, + SetupTimingInfo* timing_info, ClusteredPinTimingInvalidator* pin_timing_invalidator, t_pl_blocks_to_be_moved& blocks_affected, const PlaceDelayModel* delay_model, - const PlacerCriticalities* criticalities, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, float rlim_escape_fraction, enum e_place_algorithm place_algorithm, float timing_tradeoff) { @@ -1643,14 +1579,44 @@ static e_move_result try_swap(float t, //Update the block positions apply_move_blocks(blocks_affected); - // Find all the nets affected by this swap and update their costs + //Find all the nets affected by this swap and update their costs + //This routine calculates new connection delays and timing costs + //and store them in proposed_* data structures int num_nets_affected = find_affected_nets_and_update_costs(place_algorithm, delay_model, criticalities, blocks_affected, bb_delta_c, timing_delta_c); - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + + //Find all the sink pins with changed connection delays from the affected blocks + //These sink pins will be passed into the pin_timing_invalidator for sta update + std::vector sink_pins_affected; + find_affected_sink_pins(blocks_affected, sink_pins_affected); + + if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + //Invalidates timing of modified connections for incremental timing updates + //This routine relies on comparing proposed_connection_delay and connection_delay + invalidate_affected_connection_delays(sink_pins_affected, + pin_timing_invalidator, + timing_info); + + //Update timing information. Only update setup slacks. + //Keep the connection criticalities and timing costs stale + //so as not to mess up the original timing driven algorithm + do_update_criticalities = false; + do_update_setup_slacks = true; + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); + + delta_c = analyze_setup_slack_cost(setup_slacks); + + } else if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /*in this case we redefine delta_c as a combination of timing and bb. * *additionally, we normalize all values, therefore delta_c is in * *relation to 1*/ @@ -1668,18 +1634,32 @@ static e_move_result try_swap(float t, costs->cost += delta_c; costs->bb_cost += bb_delta_c; + if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + costs->timing_cost += timing_delta_c; + + //Commit the setup slack information + commit_setup_slacks(setup_slacks); + + //Update the connection_timing_cost and connection_delay + //values from the temporary values. + //The connections have already been invalidated and updated + //during the previous analysis stage. + commit_td_cost(blocks_affected); + } + if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { costs->timing_cost += timing_delta_c; //Invalidates timing of modified connections for incremental timing updates - //Must be called before commit_td_cost since it relies on comparing - //proposed_connection_delay and connection_delay - invalidate_affected_connection_delays(blocks_affected, + //This routine relies on comparing proposed_connection_delay and connection_delay + //If the setup slack analysis was not performed, the + //sink pins are yet to be invalidated. + invalidate_affected_connection_delays(sink_pins_affected, pin_timing_invalidator, timing_info); - /*update the connection_timing_cost and connection_delay - * values from the temporary values */ + //update the connection_timing_cost and connection_delay + //values from the temporary values commit_td_cost(blocks_affected); } @@ -1696,6 +1676,30 @@ static e_move_result try_swap(float t, /* Restore the place_ctx.block_locs data structures to their state before the move. */ revert_move_blocks(blocks_affected); + if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + //Re-invalidate the affected sink pins + invalidate_affected_connection_delays(sink_pins_affected, + pin_timing_invalidator, + timing_info); + + /* Blocks are restored. Now Restore the timing information to pre-analysis state */ + do_update_criticalities = false; + do_update_setup_slacks = true; + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); + /* + * VTR_ASSERT_MSG( + * verify_connection_setup_slacks(setup_slacks), + * "The setup slacks should not change after reverting to state before the timing info update."); + */ + revert_td_cost(blocks_affected); + } + if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { revert_td_cost(blocks_affected); } @@ -1872,6 +1876,29 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, } } +static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, + std::vector& sink_pins_affected) { + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + for (ClusterPinId clb_pin : blocks_affected.affected_pins) { + //It is possible that some connections may not have changed delay.(e.g. + //For instance, if using a dx/dy delay model, this could occur if a sink + //moved to a new position with the same dx/dy from it's driver. + // + //To minimize work during the incremental STA update we do not invalidate + //such unchanged connections. + + ClusterNetId net = clb_nlist.pin_net(clb_pin); + int ipin = clb_nlist.pin_net_index(clb_pin); + + if (proposed_connection_delay[net][ipin] != connection_delay[net][ipin]) { + //Delay has changed. Must invalidate this sink pin. + sink_pins_affected.push_back(clb_pin); + } + } +} + static e_move_result assess_swap(double delta_c, double t) { /* Returns: 1 -> move accepted, 0 -> rejected. */ if (delta_c <= 0) { @@ -1973,13 +2000,15 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { } } -//Copy all the current setup slacks from the PlacerSetupSlacks class -//This routine will always be incremental and correct, as it is called -//if and only if the PlacerSetupSlacks class is updated with new slack values -static void record_setup_slacks(const PlacerSetupSlacks* setup_slacks) { +//Commit all the setup slack values from the PlacerSetupSlacks class. +//This routine will be incremental and correct if and only if +//it is called immediately after each time +//update_setup_slacks_and_criticalities updates the setup slacks +//i.e. do_update_setup_slacks = true +static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - //Only go through sink pins with modified setup slack + //Incremental: only go through sink pins with modified setup slack for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { ClusterNetId net_id = clb_nlist.pin_net(pin_id); size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); @@ -2068,31 +2097,21 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { // //Relies on proposed_connection_delay and connection_delay to detect //which connections have actually had their delay changed. -static void invalidate_affected_connection_delays(const t_pl_blocks_to_be_moved& blocks_affected, +static void invalidate_affected_connection_delays(const std::vector& sink_pins_affected, ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info) { VTR_ASSERT_SAFE(timing_info); VTR_ASSERT_SAFE(pin_tedges_invalidator); - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - //Inalidate timing graph edges affected by the move - for (ClusterPinId pin : blocks_affected.affected_pins) { - //It is possible that some connections may not have changed delay.(e.g. + //Invalidate timing graph edges affected by the move + for (ClusterPinId clb_pin : sink_pins_affected) { + //It is possible that some connections may not have changed delay. //For instance, if using a dx/dy delay model, this could occur if a sink //moved to a new position with the same dx/dy from it's driver. // - //To minimze work during the incremental STA update we do not invalidate + //To minimize work during the incremental STA update we do not invalidate //such unchanged connections. - - ClusterNetId net = clb_nlist.pin_net(pin); - int ipin = clb_nlist.pin_net_index(pin); - - if (proposed_connection_delay[net][ipin] != connection_delay[net][ipin]) { - //Delay changed, must invalidate - pin_tedges_invalidator->invalidate_connection(pin, timing_info); - } + pin_tedges_invalidator->invalidate_connection(clb_pin, timing_info); } } @@ -3235,59 +3254,3 @@ static void init_annealing_state(t_annealing_state* state, bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE); } - -//Recording down all the info about the placer's current state -static void take_placement_snapshot() { - const auto& place_ctx = g_vpr_ctx.placement(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); - - const auto& clb_nlist = cluster_ctx.clb_nlist; - - connection_delay_snapshot = connection_delay; - //Go through every single sink pin to check if delay has been updated - for (ClusterNetId net_id : clb_nlist.nets()) { - for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { - VTR_ASSERT_MSG(connection_delay[net_id][ipin] == connection_delay_snapshot[net_id][ipin], - "Direct assignment of the delay has failed"); - } - } - - connection_timing_cost_snapshot = connection_timing_cost; - bb_coords_snapshot = bb_coords; - bb_num_on_edges_snapshot = bb_num_on_edges; - net_cost_snapshot = net_cost; - bb_updated_before_snapshot = bb_updated_before; - - block_locs_snapshot = place_ctx.block_locs; - grid_blocks_snapshot = place_ctx.grid_blocks; -} - -//Revert back to the recorded placer state, which is the state -//of the placer when the last timing info update took place -static void revert_placement_snapshot(ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info) { - auto& place_ctx = g_vpr_ctx.mutable_placement(); - const auto& cluster_ctx = g_vpr_ctx.clustering(); - - const auto& clb_nlist = cluster_ctx.clb_nlist; - - //Go through every single sink pin to check if delay has changed - for (ClusterNetId net_id : clb_nlist.nets()) { - for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { - if (connection_delay[net_id][ipin] != connection_delay_snapshot[net_id][ipin]) { - //Delay changed, must invalidate - ClusterPinId pin_id = clb_nlist.net_pin(net_id, ipin); - pin_tedges_invalidator->invalidate_connection(pin_id, timing_info); - connection_delay[net_id][ipin] = connection_delay_snapshot[net_id][ipin]; - } - } - } - - connection_timing_cost = connection_timing_cost_snapshot; - bb_coords = bb_coords_snapshot; - bb_num_on_edges = bb_num_on_edges_snapshot; - net_cost = net_cost_snapshot; - bb_updated_before = bb_updated_before_snapshot; - - place_ctx.block_locs = block_locs_snapshot; - place_ctx.grid_blocks = grid_blocks_snapshot; -} \ No newline at end of file From 96e65ba3fc6ccffd6191da0736fb256fd36d8fb3 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 14 Aug 2020 05:33:03 -0400 Subject: [PATCH 12/24] Corrected the timing update and reversion of setup slack analysis during the placement quench stage. Made commit_td_cost method incremental by only going through sink pins affected by the moved blocks. --- vpr/src/place/place.cpp | 261 +++++++++++++++++++++------------------- 1 file changed, 135 insertions(+), 126 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 258c23875aa..1d1f17832f5 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -119,6 +119,14 @@ struct t_annealing_state { int move_lim; // Current move limit }; +/* Determines if slacks/criticalities need to be updated */ +static bool do_update_criticalities = true; +static bool do_update_setup_slacks = true; + +/* Determines if slacks/criticalities need to be recomputed from scratch */ +bool do_recompute_criticalities = true; +bool do_recompute_setup_slacks = true; + constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); constexpr double MAX_INV_TIMING_COST = 1.e9; @@ -182,14 +190,6 @@ static vtr::vector net_timing_cost; //Like connection_timi static vtr::vector bb_coords, bb_num_on_edges; -/* Determines if slacks/criticalities need to be updated */ -static bool do_update_criticalities = true; -static bool do_update_setup_slacks = true; - -/* Determines if slacks/criticalities need to be recomputed from scratch */ -static bool do_recompute_criticalities = true; -static bool do_recompute_setup_slacks = true; - /* The arrays below are used to precompute the inverse of the average * * number of tracks per channel between [subhigh] and [sublow]. Access * * them as chan?_place_cost_fac[subhigh][sublow]. They are used to * @@ -417,6 +417,8 @@ static double sum_td_costs(); static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, std::vector& sink_pins_affected); +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); + static e_move_result assess_swap(double delta_c, double t); static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new); @@ -487,8 +489,6 @@ static void placement_inner_loop(float t, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - bool inner_loop_update_crit, - bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info, @@ -524,8 +524,6 @@ static void print_resources_utilization(); static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent); -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); - /*****************************************************************************/ void try_place(const t_placer_opts& placer_opts, t_annealing_sched annealing_sched, @@ -648,6 +646,16 @@ void try_place(const t_placer_opts& placer_opts, do_update_setup_slacks = true; do_recompute_criticalities = true; do_recompute_setup_slacks = true; + + //As a safety measure, for the first time update, + //invalidate all timing edges via the pin invalidator. + auto& clb_nlist = cluster_ctx.clb_nlist; + for (ClusterNetId net : clb_nlist.nets()) { + for (ClusterPinId pin : clb_nlist.net_sinks(net)) { + pin_timing_invalidator.get()->invalidate_connection(pin, timing_info.get()); + } + } + update_setup_slacks_and_criticalities(first_crit_exponent, place_delay_model.get(), placer_criticalities.get(), @@ -816,8 +824,6 @@ void try_place(const t_placer_opts& placer_opts, pin_timing_invalidator.get(), timing_info.get()); - bool anneal_update_crit = true, anneal_update_setup_slack = false; - placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, state.move_lim, state.crit_exponent, inner_recompute_limit, &stats, &costs, @@ -827,8 +833,6 @@ void try_place(const t_placer_opts& placer_opts, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), - anneal_update_crit, - anneal_update_setup_slack, *move_generator, blocks_affected, timing_info.get(), @@ -882,8 +886,14 @@ void try_place(const t_placer_opts& placer_opts, state.t = 0; /* freeze out */ - //Analyze setup slacks for quench - bool quench_update_crit = true, quench_update_setup_slack = true; + //Use setup slack analysis if the placer is timing driven + //TODO: make this a command line option to turn on slack analysis + enum e_place_algorithm quench_algorithm; + if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + quench_algorithm = SETUP_SLACK_ANALYSIS_PLACE; + } else { + quench_algorithm = BOUNDING_BOX_PLACE; + } /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ @@ -896,12 +906,10 @@ void try_place(const t_placer_opts& placer_opts, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), - quench_update_crit, - quench_update_setup_slack, *move_generator, blocks_affected, timing_info.get(), - e_place_algorithm::SETUP_SLACK_ANALYSIS_PLACE); + quench_algorithm); tot_iter += move_lim; ++num_temps; @@ -1116,8 +1124,6 @@ static void placement_inner_loop(float t, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, - bool inner_loop_update_crit, - bool inner_loop_update_setup_slack, MoveGenerator& move_generator, t_pl_blocks_to_be_moved& blocks_affected, SetupTimingInfo* timing_info, @@ -1136,7 +1142,11 @@ static void placement_inner_loop(float t, /* Inner loop begins */ for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { - e_move_result swap_result = try_swap(t, crit_exponent, costs, prev_inverse_costs, rlim, + e_move_result swap_result = try_swap(t, + crit_exponent, + costs, + prev_inverse_costs, + rlim, move_generator, timing_info, pin_timing_invalidator, @@ -1162,7 +1172,7 @@ static void placement_inner_loop(float t, num_swap_rejected++; } - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ @@ -1177,8 +1187,8 @@ static void placement_inner_loop(float t, * criticalities and update the timing cost since it will change. */ //Update timing information - do_update_criticalities = inner_loop_update_crit; - do_update_setup_slacks = inner_loop_update_setup_slack; + do_update_criticalities = true; + do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, @@ -1186,11 +1196,8 @@ static void placement_inner_loop(float t, pin_timing_invalidator, timing_info, costs); - - //Commit the setup slacks if they are updated - if (do_update_setup_slacks) { - commit_setup_slacks(setup_slacks); - } + //Commit the setup slacks + commit_setup_slacks(setup_slacks); } inner_crit_iter_count++; } @@ -1225,35 +1232,6 @@ static void placement_inner_loop(float t, /* Inner loop ends */ } -static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { - const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& clb_nlist = cluster_ctx.clb_nlist; - - //Find the original/proposed setup slacks of pins with modified values - std::vector original_setup_slacks, proposed_setup_slacks; - - for (ClusterPinId clb_pin : setup_slacks->pins_with_modified_setup_slack()) { - ClusterNetId net_id = clb_nlist.pin_net(clb_pin); - size_t ipin = clb_nlist.pin_net_index(clb_pin); - - original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); - proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); - } - - //If there are no pins with modified slack values, accept this move - //by returning an arbitrary negative number - if (original_setup_slacks.empty()) { - return -1; - } - - //Sort in ascending order, from worse slack value to best - std::sort(original_setup_slacks.begin(), original_setup_slacks.end()); - std::sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); - - //Compare if the worse slack value has gotten worse or better - return original_setup_slacks.front() - proposed_setup_slacks.front(); -} - static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const PlaceDelayModel* delay_model, const PlacerCriticalities* criticalities, @@ -1426,11 +1404,12 @@ static float starting_t(float crit_exponent, av = 0.; sum_of_squares = 0.; - /* Try one move per block. Set t high so essentially all accepted. */ + /* Try one move per block. Set the temperature high so essentially all accepted. */ + float t = HUGE_POSITIVE_FLOAT; for (i = 0; i < move_lim; i++) { //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack - e_move_result swap_result = try_swap(HUGE_POSITIVE_FLOAT, + e_move_result swap_result = try_swap(t, crit_exponent, costs, prev_inverse_costs, @@ -1582,6 +1561,8 @@ static e_move_result try_swap(float t, //Find all the nets affected by this swap and update their costs //This routine calculates new connection delays and timing costs //and store them in proposed_* data structures + //This routine also calculates the wiring cost, which doesn't + //depend on the timing driven data int num_nets_affected = find_affected_nets_and_update_costs(place_algorithm, delay_model, criticalities, @@ -1589,8 +1570,9 @@ static e_move_result try_swap(float t, bb_delta_c, timing_delta_c); - //Find all the sink pins with changed connection delays from the affected blocks - //These sink pins will be passed into the pin_timing_invalidator for sta update + //Find all the sink pins with changed connection delays from the affected blocks. + //These sink pins will be passed into the pin_timing_invalidator for timing update. + //They will also be added to the pin invalidator when we wish to revert a timing update. std::vector sink_pins_affected; find_affected_sink_pins(blocks_affected, sink_pins_affected); @@ -1601,9 +1583,18 @@ static e_move_result try_swap(float t, pin_timing_invalidator, timing_info); - //Update timing information. Only update setup slacks. - //Keep the connection criticalities and timing costs stale - //so as not to mess up the original timing driven algorithm + //Update the connection_timing_cost and connection_delay + //values from the temporary values. + commit_td_cost(blocks_affected); + + //Update timing information. Since we are analyzing setup slacks, + //we only update those values and keep the criticalities stale + //so as not to interfere with the original timing driven algorithm. + // + //Note: the timing info must be called after applying block moves + //and committing the timing driven delays and costs. + //If we wish to revert this timing update due to move rejection, + //we need to revert block moves and restore the timing values. do_update_criticalities = false; do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, @@ -1614,6 +1605,8 @@ static e_move_result try_swap(float t, timing_info, costs); + /* Get the setup slack analysis cost */ + //TODO: calculate a weighted average of the slack cost and wiring cost delta_c = analyze_setup_slack_cost(setup_slacks); } else if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { @@ -1623,7 +1616,8 @@ static e_move_result try_swap(float t, delta_c = (1 - timing_tradeoff) * bb_delta_c * prev_inverse_costs->bb_cost + timing_tradeoff * timing_delta_c * prev_inverse_costs->timing_cost; - } else { + + } else { //place_algorithm == BOUNDING_BOX_PLACE (wiring cost) delta_c = bb_delta_c; } @@ -1635,16 +1629,12 @@ static e_move_result try_swap(float t, costs->bb_cost += bb_delta_c; if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + /* Update the timing driven cost as usual */ costs->timing_cost += timing_delta_c; //Commit the setup slack information + //The timing delay and cost values should be committed already commit_setup_slacks(setup_slacks); - - //Update the connection_timing_cost and connection_delay - //values from the temporary values. - //The connections have already been invalidated and updated - //during the previous analysis stage. - commit_td_cost(blocks_affected); } if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { @@ -1669,20 +1659,27 @@ static e_move_result try_swap(float t, /* Update clb data structures since we kept the move. */ commit_move_blocks(blocks_affected); - } else { /* Move was rejected. */ - /* Reset the net cost function flags first. */ + } else { //move_outcome == REJECTED + + /* Reset the net cost function flags first. */ reset_move_nets(num_nets_affected); /* Restore the place_ctx.block_locs data structures to their state before the move. */ revert_move_blocks(blocks_affected); if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + //Revert the timing delays and costs to pre-update values + //These routines must be called after reverting the block moves + //TODO: make this process incremental + comp_td_connection_delays(delay_model); + comp_td_costs(delay_model, *criticalities, &costs->timing_cost); + //Re-invalidate the affected sink pins invalidate_affected_connection_delays(sink_pins_affected, pin_timing_invalidator, timing_info); - /* Blocks are restored. Now Restore the timing information to pre-analysis state */ + /* Revert the timing update */ do_update_criticalities = false; do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, @@ -1692,15 +1689,14 @@ static e_move_result try_swap(float t, pin_timing_invalidator, timing_info, costs); - /* - * VTR_ASSERT_MSG( - * verify_connection_setup_slacks(setup_slacks), - * "The setup slacks should not change after reverting to state before the timing info update."); - */ - revert_td_cost(blocks_affected); + + VTR_ASSERT_SAFE_MSG( + verify_connection_setup_slacks(setup_slacks), + "The current setup slacks should be identical to the values before the try swap timing info update."); } if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + /* Unstage the values stored in proposed_* data structures */ revert_td_cost(blocks_affected); } } @@ -1728,7 +1724,7 @@ static e_move_result try_swap(float t, check_place(*costs, delay_model, place_algorithm); #endif - return (move_outcome); + return move_outcome; } //Puts all the nets changed by the current swap into nets_to_update, @@ -1768,7 +1764,7 @@ static int find_affected_nets_and_update_costs(e_place_algorithm place_algorithm //once per net, not once per pin. update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (place_algorithm == PATH_TIMING_DRIVEN_PLACE || place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { //Determine the change in timing costs if required update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c); } @@ -1899,6 +1895,41 @@ static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affect } } +static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + //Find the original/proposed setup slacks of pins with modified values + std::vector original_setup_slacks, proposed_setup_slacks; + + auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); + for (ClusterPinId clb_pin : clb_pins_modified) { + ClusterNetId net_id = clb_nlist.pin_net(clb_pin); + size_t ipin = clb_nlist.pin_net_index(clb_pin); + + original_setup_slacks.push_back(connection_setup_slack[net_id][ipin]); + proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); + } + + //Sort in ascending order, from worse slack value to best + std::sort(original_setup_slacks.begin(), original_setup_slacks.end()); + std::sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); + + //Check the first pair of slack values that are different + //If found, return their difference + for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { + float slack_diff = original_setup_slacks[idiff] != proposed_setup_slacks[idiff]; + + if (slack_diff != 0) { + return slack_diff; + } + } + + //If all slack values are identical(or no modified slack values), + //reject this move by returning an arbitrary positive number as cost + return 1; +} + static e_move_result assess_swap(double delta_c, double t) { /* Returns: 1 -> move accepted, 0 -> rejected. */ if (delta_c <= 0) { @@ -2001,15 +2032,15 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { } //Commit all the setup slack values from the PlacerSetupSlacks class. -//This routine will be incremental and correct if and only if -//it is called immediately after each time -//update_setup_slacks_and_criticalities updates the setup slacks -//i.e. do_update_setup_slacks = true +//This incremental routine will be correct if and only if it is called +//immediately after each time update_setup_slacks_and_criticalities +//updates the setup slacks (i.e. do_update_setup_slacks = true) static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; //Incremental: only go through sink pins with modified setup slack - for (ClusterPinId pin_id : setup_slacks->pins_with_modified_setup_slack()) { + auto clb_pins_modified = setup_slacks->pins_with_modified_setup_slack(); + for (ClusterPinId pin_id : clb_pins_modified) { ClusterNetId net_id = clb_nlist.pin_net(pin_id); size_t pin_index_in_net = clb_nlist.pin_net_index(pin_id); @@ -2020,7 +2051,7 @@ static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; - //Go through every single sink pin + //Go through every single sink pin to check that the slack values are the same for (ClusterNetId net_id : clb_nlist.nets()) { for (size_t ipin = 1; ipin < clb_nlist.net_pins(net_id).size(); ++ipin) { if (connection_setup_slack[net_id][ipin] != setup_slacks->setup_slack(net_id, ipin)) { @@ -2033,44 +2064,22 @@ static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks } /* Update the connection_timing_cost values from the temporary * - * values for all connections that have changed. */ + * values for all connections that have changed. */ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; - /* Go through all the blocks moved. */ - for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; iblk++) { - ClusterBlockId bnum = blocks_affected.moved_blocks[iblk].block_num; - for (ClusterPinId pin_id : cluster_ctx.clb_nlist.block_pins(bnum)) { - ClusterNetId net_id = cluster_ctx.clb_nlist.pin_net(pin_id); - - if (cluster_ctx.clb_nlist.net_is_ignored(net_id)) - continue; - - if (cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::DRIVER) { - //This net is being driven by a moved block, recompute - //all point to point connections on this net. - for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) { - connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; - proposed_connection_delay[net_id][ipin] = INVALID_DELAY; - connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; - proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; - } - } else { - //This pin is a net sink on a moved block - VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin_id) == PinType::SINK); - - /* The following "if" prevents the value from being updated twice. */ - if (!driven_by_moved_block(net_id, blocks_affected)) { - int net_pin = cluster_ctx.clb_nlist.pin_net_index(pin_id); + //Go through all the sink pins affected + for (ClusterPinId pin_id : blocks_affected.affected_pins) { + ClusterNetId net_id = clb_nlist.pin_net(pin_id); + int ipin = clb_nlist.pin_net_index(pin_id); - connection_delay[net_id][net_pin] = proposed_connection_delay[net_id][net_pin]; - proposed_connection_delay[net_id][net_pin] = INVALID_DELAY; - connection_timing_cost[net_id][net_pin] = proposed_connection_timing_cost[net_id][net_pin]; - proposed_connection_timing_cost[net_id][net_pin] = INVALID_DELAY; - } - } - } /* Finished going through all the pins in the moved block */ - } /* Finished going through all the blocks moved */ + //Commit the timing delay and cost values + connection_delay[net_id][ipin] = proposed_connection_delay[net_id][ipin]; + proposed_connection_delay[net_id][ipin] = INVALID_DELAY; + connection_timing_cost[net_id][ipin] = proposed_connection_timing_cost[net_id][ipin]; + proposed_connection_timing_cost[net_id][ipin] = INVALID_DELAY; + } } //Reverts modifications to proposed_connection_delay and proposed_connection_timing_cost based on From 112bde57b797764dc9694bfbcb64ec92dcfde9e5 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 14 Aug 2020 07:07:47 -0400 Subject: [PATCH 13/24] Moved four boolean global variables controlling the timing update into a new local structure called t_placer_timing_update_mode to tidy up the code. --- vpr/src/place/place.cpp | 187 ++++++++++++++++++++++++++-------------- 1 file changed, 122 insertions(+), 65 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 1d1f17832f5..b6045c3139c 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -119,13 +119,15 @@ struct t_annealing_state { int move_lim; // Current move limit }; -/* Determines if slacks/criticalities need to be updated */ -static bool do_update_criticalities = true; -static bool do_update_setup_slacks = true; - -/* Determines if slacks/criticalities need to be recomputed from scratch */ -bool do_recompute_criticalities = true; -bool do_recompute_setup_slacks = true; +struct t_placer_timing_update_mode { + /* Determines if slacks/criticalities need to be updated */ + bool do_update_criticalities; + bool do_update_setup_slacks; + + /* Determines if slacks/criticalities need to be recomputed from scratch */ + bool do_recompute_criticalities; + bool do_recompute_setup_slacks; +}; constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); @@ -332,6 +334,7 @@ static void reset_move_nets(int num_nets_affected); static e_move_result try_swap(float t, float crit_exponent, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, @@ -360,6 +363,7 @@ static int check_block_placement_consistency(); static int check_macro_placement_consistency(); static float starting_t(float crit_exponent, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, @@ -455,6 +459,7 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr); static void free_try_swap_arrays(); static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int num_connections, @@ -466,12 +471,22 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info); +static void initialize_timing_info(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_timing_update_mode* timing_update_mode, + t_placer_costs* costs); + static void update_setup_slacks_and_criticalities(float crit_exponent, const PlaceDelayModel* delay_model, PlacerCriticalities* criticalities, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs); static void placement_inner_loop(float t, @@ -482,6 +497,7 @@ static void placement_inner_loop(float t, float crit_exponent, int inner_recompute_limit, t_placer_statistics* stats, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int* moves_since_cost_recompute, @@ -569,6 +585,7 @@ void try_place(const t_placer_opts& placer_opts, std::unique_ptr pin_timing_invalidator; t_pl_blocks_to_be_moved blocks_affected(cluster_ctx.clb_nlist.blocks().size()); + t_placer_timing_update_mode timing_update_mode; /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); @@ -642,32 +659,14 @@ void try_place(const t_placer_opts& placer_opts, atom_ctx.lookup, *timing_info->timing_graph()); //First time compute timing and costs, compute from scratch - do_update_criticalities = true; - do_update_setup_slacks = true; - do_recompute_criticalities = true; - do_recompute_setup_slacks = true; - - //As a safety measure, for the first time update, - //invalidate all timing edges via the pin invalidator. - auto& clb_nlist = cluster_ctx.clb_nlist; - for (ClusterNetId net : clb_nlist.nets()) { - for (ClusterPinId pin : clb_nlist.net_sinks(net)) { - pin_timing_invalidator.get()->invalidate_connection(pin, timing_info.get()); - } - } - - update_setup_slacks_and_criticalities(first_crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - placer_setup_slacks.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &costs); - - //Initialize the setup slacks matrix - commit_setup_slacks(placer_setup_slacks.get()); - - timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement + initialize_timing_info(first_crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &timing_update_mode, + &costs); critical_path = timing_info->least_slack_critical_path(); @@ -780,7 +779,8 @@ void try_place(const t_placer_opts& placer_opts, first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - float first_t = starting_t(first_crit_exponent, &costs, &prev_inverse_costs, + float first_t = starting_t(first_crit_exponent, &timing_update_mode, + &costs, &prev_inverse_costs, annealing_sched, move_lim, first_rlim, place_delay_model.get(), placer_criticalities.get(), @@ -814,7 +814,8 @@ void try_place(const t_placer_opts& placer_opts, costs.cost = 1; } - outer_loop_update_timing_info(placer_opts, &costs, &prev_inverse_costs, + outer_loop_update_timing_info(placer_opts, &timing_update_mode, + &costs, &prev_inverse_costs, num_connections, state.crit_exponent, &outer_crit_iter_count, @@ -826,8 +827,7 @@ void try_place(const t_placer_opts& placer_opts, placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, state.move_lim, state.crit_exponent, inner_recompute_limit, &stats, - &costs, - &prev_inverse_costs, + &timing_update_mode, &costs, &prev_inverse_costs, &moves_since_cost_recompute, pin_timing_invalidator.get(), place_delay_model.get(), @@ -873,8 +873,8 @@ void try_place(const t_placer_opts& placer_opts, { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - outer_loop_update_timing_info(placer_opts, &costs, - &prev_inverse_costs, + outer_loop_update_timing_info(placer_opts, &timing_update_mode, + &costs, &prev_inverse_costs, num_connections, state.crit_exponent, &outer_crit_iter_count, @@ -899,8 +899,7 @@ void try_place(const t_placer_opts& placer_opts, * which reduce the cost of the placement */ placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, move_lim, state.crit_exponent, quench_recompute_limit, &stats, - &costs, - &prev_inverse_costs, + &timing_update_mode, &costs, &prev_inverse_costs, &moves_since_cost_recompute, pin_timing_invalidator.get(), place_delay_model.get(), @@ -958,16 +957,19 @@ void try_place(const t_placer_opts& placer_opts, VTR_ASSERT(timing_info); //Update timing and costs - do_update_criticalities = true; - do_update_setup_slacks = false; + timing_update_mode.do_update_criticalities = true; + timing_update_mode.do_update_setup_slacks = true; update_setup_slacks_and_criticalities(state.crit_exponent, place_delay_model.get(), placer_criticalities.get(), placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get(), + &timing_update_mode, &costs); + commit_setup_slacks(placer_setup_slacks.get()); + critical_path = timing_info->least_slack_critical_path(); if (isEchoFileEnabled(E_ECHO_FINAL_PLACEMENT_TIMING_GRAPH)) { @@ -1021,6 +1023,7 @@ void try_place(const t_placer_opts& placer_opts, /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int num_connections, @@ -1031,8 +1034,9 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info) { - if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) + if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) { return; + } /*at each temperature change we update these values to be used */ /*for normalizing the tradeoff between timing and wirelength (bb) */ @@ -1044,15 +1048,16 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, num_connections = std::max(num_connections, 1); //Avoid division by zero VTR_ASSERT(num_connections > 0); - //Update timing information and criticalities - do_update_criticalities = true; - do_update_setup_slacks = true; + //Update all timing information + timing_update_mode->do_update_criticalities = true; + timing_update_mode->do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, setup_slacks, pin_timing_invalidator, timing_info, + timing_update_mode, costs); //Always commit the setup slacks when they are updated @@ -1069,6 +1074,50 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, prev_inverse_costs->timing_cost = min(1 / costs->timing_cost, MAX_INV_TIMING_COST); } +static void initialize_timing_info(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_timing_update_mode* timing_update_mode, + t_placer_costs* costs) { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& clb_nlist = cluster_ctx.clb_nlist; + + //Initialize the timing update mode. Update both + //setup slacks and criticalities from scratch + timing_update_mode->do_update_criticalities = true; + timing_update_mode->do_update_setup_slacks = true; + timing_update_mode->do_recompute_criticalities = true; + timing_update_mode->do_recompute_setup_slacks = true; + + //As a safety measure, for the first time update, + //invalidate all timing edges via the pin invalidator + //by passing in all the clb sink pins + for (ClusterNetId net_id : clb_nlist.nets()) { + for (ClusterPinId pin_id : clb_nlist.net_sinks(net_id)) { + pin_timing_invalidator->invalidate_connection(pin_id, timing_info); + } + } + + //Perform timing info update + update_setup_slacks_and_criticalities(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + timing_update_mode, + costs); + + //Initialize the data structure that stores committed placer setup slacks + commit_setup_slacks(setup_slacks); + + //Don't warn again about unconstrained nodes again during placement + timing_info->set_warn_unconstrained(false); +} + //Update timing information based on current placement by running STA. //Record the new slack information as well as calculate the updated //criticalities and timing costs (based on the new setup slacks) @@ -1078,18 +1127,19 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs) { //Run STA to update slacks and adjusted/relaxed criticalities timing_info->update(); - if (do_update_setup_slacks) { + if (timing_update_mode->do_update_setup_slacks) { //Update placer's setup slacks - setup_slacks->update_setup_slacks(timing_info, do_recompute_setup_slacks); + setup_slacks->update_setup_slacks(timing_info, timing_update_mode->do_recompute_setup_slacks); } - if (do_update_criticalities) { + if (timing_update_mode->do_update_criticalities) { //Update placer's criticalities (e.g. sharpen with crit_exponent) - criticalities->update_criticalities(timing_info, crit_exponent, do_recompute_criticalities); + criticalities->update_criticalities(timing_info, crit_exponent, timing_update_mode->do_recompute_criticalities); //Update connection, net and total timing costs based on new criticalities #ifdef INCR_COMP_TD_COSTS @@ -1099,10 +1149,11 @@ static void update_setup_slacks_and_criticalities(float crit_exponent, #endif } - //Setup slacks and criticalities need to be in sync with the timing_info - //Otherwise, they cannot be incrementally updated on the next iteration - do_recompute_setup_slacks = !do_update_setup_slacks; - do_recompute_criticalities = !do_update_criticalities; + //Setup slacks and criticalities need to be in sync with the timing_info. + //if they are to be incrementally updated on the next iteration. + //Otherwise, a re-computation for all clb sink pins is required. + timing_update_mode->do_recompute_setup_slacks = !timing_update_mode->do_update_setup_slacks; + timing_update_mode->do_recompute_criticalities = !timing_update_mode->do_update_criticalities; //Clear invalidation state pin_timing_invalidator->reset(); @@ -1117,6 +1168,7 @@ static void placement_inner_loop(float t, float crit_exponent, int inner_recompute_limit, t_placer_statistics* stats, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int* moves_since_cost_recompute, @@ -1144,6 +1196,7 @@ static void placement_inner_loop(float t, for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { e_move_result swap_result = try_swap(t, crit_exponent, + timing_update_mode, costs, prev_inverse_costs, rlim, @@ -1172,7 +1225,7 @@ static void placement_inner_loop(float t, num_swap_rejected++; } - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE || place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ @@ -1184,19 +1237,20 @@ static void placement_inner_loop(float t, VTR_LOG("Inner loop recompute criticalities\n"); #endif /* Using the delays in connection_delay, do a timing analysis to update slacks and - * criticalities and update the timing cost since it will change. + * criticalities and update the timing cost since they will change. */ - //Update timing information - do_update_criticalities = true; - do_update_setup_slacks = true; + timing_update_mode->do_update_criticalities = true; + timing_update_mode->do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, setup_slacks, pin_timing_invalidator, timing_info, + timing_update_mode, costs); - //Commit the setup slacks + + //Always commit the setup slacks when they are updated commit_setup_slacks(setup_slacks); } inner_crit_iter_count++; @@ -1375,6 +1429,7 @@ static bool update_annealing_state(t_annealing_state* state, } static float starting_t(float crit_exponent, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, @@ -1411,6 +1466,7 @@ static float starting_t(float crit_exponent, //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack e_move_result swap_result = try_swap(t, crit_exponent, + timing_update_mode, costs, prev_inverse_costs, rlim, @@ -1486,6 +1542,7 @@ static void reset_move_nets(int num_nets_affected) { static e_move_result try_swap(float t, float crit_exponent, + t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, @@ -1595,14 +1652,15 @@ static e_move_result try_swap(float t, //and committing the timing driven delays and costs. //If we wish to revert this timing update due to move rejection, //we need to revert block moves and restore the timing values. - do_update_criticalities = false; - do_update_setup_slacks = true; + timing_update_mode->do_update_criticalities = false; + timing_update_mode->do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, setup_slacks, pin_timing_invalidator, timing_info, + timing_update_mode, costs); /* Get the setup slack analysis cost */ @@ -1680,14 +1738,13 @@ static e_move_result try_swap(float t, timing_info); /* Revert the timing update */ - do_update_criticalities = false; - do_update_setup_slacks = true; update_setup_slacks_and_criticalities(crit_exponent, delay_model, criticalities, setup_slacks, pin_timing_invalidator, timing_info, + timing_update_mode, costs); VTR_ASSERT_SAFE_MSG( From 29b55a317c7299cafc612304c6356608efcfbe08 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Thu, 20 Aug 2020 17:22:01 -0400 Subject: [PATCH 14/24] Added vpr option --place_quench_metric to turn on/off setup slack analysis during placement quench. Possible options are: auto, timing_cost, setup_slack. --- vpr/src/base/SetupVPR.cpp | 2 ++ vpr/src/base/read_options.cpp | 46 +++++++++++++++++++++++++++++++++++ vpr/src/base/read_options.h | 1 + vpr/src/base/vpr_types.h | 7 ++++++ vpr/src/place/place.cpp | 29 ++++++++++++++++------ 5 files changed, 78 insertions(+), 7 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index c10609e6857..546fdc2f029 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -570,6 +570,8 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->effort_scaling = Options.place_effort_scaling; PlacerOpts->timing_update_type = Options.timing_update_type; + + PlacerOpts->place_quench_metric = Options.place_quench_metric; } static void SetupAnalysisOpts(const t_options& Options, t_analysis_opts& analysis_opts) { diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index b40e867f672..76bd56d0126 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -959,6 +959,41 @@ struct ParseTimingUpdateType { } }; +struct ParsePlaceQuenchMetric { + ConvertedValue from_str(std::string str) { + ConvertedValue conv_value; + if (str == "auto") + conv_value.set_value(e_place_quench_metric::AUTO); + else if (str == "timing_cost") + conv_value.set_value(e_place_quench_metric::TIMING_COST); + else if (str == "setup_slack") + conv_value.set_value(e_place_quench_metric::SETUP_SLACK); + else { + std::stringstream msg; + msg << "Invalid conversion from '" << str << "' to e_place_quench_metric (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + conv_value.set_error(msg.str()); + } + return conv_value; + } + + ConvertedValue to_str(e_place_quench_metric val) { + ConvertedValue conv_value; + if (val == e_place_quench_metric::AUTO) + conv_value.set_value("auto"); + if (val == e_place_quench_metric::TIMING_COST) + conv_value.set_value("timing_cost"); + else { + VTR_ASSERT(val == e_place_quench_metric::SETUP_SLACK); + conv_value.set_value("setup_slack"); + } + return conv_value; + } + + std::vector default_choices() { + return {"auto", "timing_cost", "setup_slack"}; + } +}; + argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& args) { std::string description = "Implements the specified circuit onto the target FPGA architecture" @@ -1747,6 +1782,17 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.place_quench_metric, "--place_quench_metric") + .help( + "Controls which cost function the placer uses during the quench stage:\n" + " * auto: VPR decides\n" + " * timing_cost: The same cost formulation as the one used during\n" + " the annealing stage (more stable)\n" + " * setup_slack: Directly uses setup slacks (in combination with wiring)\n" + " to check if the block moves should be accepted\n") + .default_value("auto") + .show_in(argparse::ShowIn::HELP_ONLY); + auto& route_grp = parser.add_argument_group("routing options"); route_grp.add_argument(args.max_router_iterations, "--max_router_iterations") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index e3e1307823e..5964904072a 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -130,6 +130,7 @@ struct t_options { argparse::ArgValue place_delay_model; argparse::ArgValue place_delay_model_reducer; argparse::ArgValue allowed_tiles_for_delay_model; + argparse::ArgValue place_quench_metric; /* Router Options */ argparse::ArgValue check_rr_graph; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 34f08d250f6..9019dacba91 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -885,6 +885,12 @@ enum class e_place_delta_delay_algorithm { DIJKSTRA_EXPANSION, }; +enum class e_place_quench_metric { + TIMING_COST, + SETUP_SLACK, + AUTO +}; + struct t_placer_opts { enum e_place_algorithm place_algorithm; float timing_tradeoff; @@ -933,6 +939,7 @@ struct t_placer_opts { std::string allowed_tiles_for_delay_model; e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method; + e_place_quench_metric place_quench_metric; }; /* All the parameters controlling the router's operation are in this * diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index b6045c3139c..5df9a124473 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -540,6 +540,8 @@ static void print_resources_utilization(); static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent); +static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts); + /*****************************************************************************/ void try_place(const t_placer_opts& placer_opts, t_annealing_sched annealing_sched, @@ -887,13 +889,9 @@ void try_place(const t_placer_opts& placer_opts, state.t = 0; /* freeze out */ //Use setup slack analysis if the placer is timing driven - //TODO: make this a command line option to turn on slack analysis - enum e_place_algorithm quench_algorithm; - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { - quench_algorithm = SETUP_SLACK_ANALYSIS_PLACE; - } else { - quench_algorithm = BOUNDING_BOX_PLACE; - } + //and the quench metric is SETUP_SLACK. Otherwise, use the + //same cost formulation as the annealing stage + auto quench_algorithm = get_placement_quench_algorithm(placer_opts); /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ @@ -3317,6 +3315,23 @@ static void init_annealing_state(t_annealing_state* state, state->crit_exponent = crit_exponent; } +static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts) { + e_place_algorithm place_algo = placer_opts.place_algorithm; + e_place_quench_metric quench_metric = placer_opts.place_quench_metric; + + if (place_algo == e_place_algorithm::PATH_TIMING_DRIVEN_PLACE) { + if (quench_metric == e_place_quench_metric::AUTO || quench_metric == e_place_quench_metric::TIMING_COST) { + return PATH_TIMING_DRIVEN_PLACE; + } else { + VTR_ASSERT(quench_metric == e_place_quench_metric::SETUP_SLACK); + return SETUP_SLACK_ANALYSIS_PLACE; + } + } else { + VTR_ASSERT(place_algo == e_place_algorithm::BOUNDING_BOX_PLACE); + return BOUNDING_BOX_PLACE; + } +} + bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE); } From f641b668e82c6e08e831c305f72ba1d8b097aaab Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 28 Aug 2020 01:18:41 -0400 Subject: [PATCH 15/24] Added the option --place_quench_metric to VPR documentation on VPR command-line options. --- doc/src/vpr/command_line_usage.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index d4cb178bca3..e462f1b42a3 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -862,6 +862,16 @@ The following options are only valid when the placement engine is in timing-driv Name of the post-placement timing report file to generate (not generated if unspecfied). +.. option:: --place_quench_metric {auto, timing_cost, setup_slack} + + Specifies which cost formulation the placer uses during the quench stage. + + * ``auto`` VPR makes the choice. Currently, VPR uses ``timing_cost`` by default. + * ``timing_cost`` Use the timing cost: connection delay * criticality. + * ``setup_slack`` Directly checks the raw setup slack returned by the timing analyzer. + + **Default:** ``auto`` + .. _router_options: Router Options From d88a38dfdc4d322f6a0db41db38ea953622ee2a6 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 28 Aug 2020 04:38:18 -0400 Subject: [PATCH 16/24] Moved timing_update_mode boolean variables into PlacerSetupSlacks and PlacerCriticalities and cleaned up related code in placer routines. Enchanced documentation requested by PR comments. --- vpr/src/place/place.cpp | 362 +++++++++++++++++++-------------- vpr/src/place/timing_place.cpp | 141 +++++++++---- vpr/src/place/timing_place.h | 334 ++++++++++++++++++++---------- vpr/src/timing/timing_util.cpp | 23 ++- 4 files changed, 539 insertions(+), 321 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 5df9a124473..7fb89b70b15 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -119,16 +119,6 @@ struct t_annealing_state { int move_lim; // Current move limit }; -struct t_placer_timing_update_mode { - /* Determines if slacks/criticalities need to be updated */ - bool do_update_criticalities; - bool do_update_setup_slacks; - - /* Determines if slacks/criticalities need to be recomputed from scratch */ - bool do_recompute_criticalities; - bool do_recompute_setup_slacks; -}; - constexpr float INVALID_DELAY = std::numeric_limits::quiet_NaN(); constexpr double MAX_INV_TIMING_COST = 1.e9; @@ -334,7 +324,6 @@ static void reset_move_nets(int num_nets_affected); static e_move_result try_swap(float t, float crit_exponent, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, @@ -363,7 +352,6 @@ static int check_block_placement_consistency(); static int check_macro_placement_consistency(); static float starting_t(float crit_exponent, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, @@ -459,7 +447,6 @@ static double get_net_wirelength_estimate(ClusterNetId net_id, t_bb* bbptr); static void free_try_swap_arrays(); static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int num_connections, @@ -477,17 +464,25 @@ static void initialize_timing_info(float crit_exponent, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs); -static void update_setup_slacks_and_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_timing_update_mode* timing_update_mode, - t_placer_costs* costs); +static void update_timing_classes(float crit_exponent, + SetupTimingInfo* timing_info, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator); + +static void update_timing_cost(const PlaceDelayModel* delay_model, + const PlacerCriticalities* criticalities, + double* timing_cost); + +static void perform_full_timing_update(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs); static void placement_inner_loop(float t, int temp_num, @@ -497,7 +492,6 @@ static void placement_inner_loop(float t, float crit_exponent, int inner_recompute_limit, t_placer_statistics* stats, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int* moves_since_cost_recompute, @@ -587,7 +581,6 @@ void try_place(const t_placer_opts& placer_opts, std::unique_ptr pin_timing_invalidator; t_pl_blocks_to_be_moved blocks_affected(cluster_ctx.clb_nlist.blocks().size()); - t_placer_timing_update_mode timing_update_mode; /* Allocated here because it goes into timing critical code where each memory allocation is expensive */ IntraLbPbPinLookup pb_gpin_lookup(device_ctx.logical_block_types); @@ -667,7 +660,6 @@ void try_place(const t_placer_opts& placer_opts, placer_setup_slacks.get(), pin_timing_invalidator.get(), timing_info.get(), - &timing_update_mode, &costs); critical_path = timing_info->least_slack_critical_path(); @@ -781,7 +773,7 @@ void try_place(const t_placer_opts& placer_opts, first_rlim = (float)max(device_ctx.grid.width() - 1, device_ctx.grid.height() - 1); - float first_t = starting_t(first_crit_exponent, &timing_update_mode, + float first_t = starting_t(first_crit_exponent, &costs, &prev_inverse_costs, annealing_sched, move_lim, first_rlim, place_delay_model.get(), @@ -816,7 +808,7 @@ void try_place(const t_placer_opts& placer_opts, costs.cost = 1; } - outer_loop_update_timing_info(placer_opts, &timing_update_mode, + outer_loop_update_timing_info(placer_opts, &costs, &prev_inverse_costs, num_connections, state.crit_exponent, @@ -829,7 +821,7 @@ void try_place(const t_placer_opts& placer_opts, placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, state.move_lim, state.crit_exponent, inner_recompute_limit, &stats, - &timing_update_mode, &costs, &prev_inverse_costs, + &costs, &prev_inverse_costs, &moves_since_cost_recompute, pin_timing_invalidator.get(), place_delay_model.get(), @@ -875,7 +867,7 @@ void try_place(const t_placer_opts& placer_opts, { /* Quench */ vtr::ScopedFinishTimer temperature_timer("Placement Quench"); - outer_loop_update_timing_info(placer_opts, &timing_update_mode, + outer_loop_update_timing_info(placer_opts, &costs, &prev_inverse_costs, num_connections, state.crit_exponent, @@ -897,7 +889,7 @@ void try_place(const t_placer_opts& placer_opts, * which reduce the cost of the placement */ placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, move_lim, state.crit_exponent, quench_recompute_limit, &stats, - &timing_update_mode, &costs, &prev_inverse_costs, + &costs, &prev_inverse_costs, &moves_since_cost_recompute, pin_timing_invalidator.get(), place_delay_model.get(), @@ -953,20 +945,13 @@ void try_place(const t_placer_opts& placer_opts, if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { //Final timing estimate VTR_ASSERT(timing_info); - - //Update timing and costs - timing_update_mode.do_update_criticalities = true; - timing_update_mode.do_update_setup_slacks = true; - update_setup_slacks_and_criticalities(state.crit_exponent, - place_delay_model.get(), - placer_criticalities.get(), - placer_setup_slacks.get(), - pin_timing_invalidator.get(), - timing_info.get(), - &timing_update_mode, - &costs); - - commit_setup_slacks(placer_setup_slacks.get()); + perform_full_timing_update(state.crit_exponent, + place_delay_model.get(), + placer_criticalities.get(), + placer_setup_slacks.get(), + pin_timing_invalidator.get(), + timing_info.get(), + &costs); critical_path = timing_info->least_slack_critical_path(); @@ -1021,7 +1006,6 @@ void try_place(const t_placer_opts& placer_opts, /* Function to update the setup slacks and criticalities before the inner loop of the annealing/quench */ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int num_connections, @@ -1046,20 +1030,14 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, num_connections = std::max(num_connections, 1); //Avoid division by zero VTR_ASSERT(num_connections > 0); - //Update all timing information - timing_update_mode->do_update_criticalities = true; - timing_update_mode->do_update_setup_slacks = true; - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - timing_update_mode, - costs); - - //Always commit the setup slacks when they are updated - commit_setup_slacks(setup_slacks); + //Update all timing related classes + perform_full_timing_update(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); *outer_crit_iter_count = 0; } @@ -1078,18 +1056,10 @@ static void initialize_timing_info(float crit_exponent, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& clb_nlist = cluster_ctx.clb_nlist; - //Initialize the timing update mode. Update both - //setup slacks and criticalities from scratch - timing_update_mode->do_update_criticalities = true; - timing_update_mode->do_update_setup_slacks = true; - timing_update_mode->do_recompute_criticalities = true; - timing_update_mode->do_recompute_setup_slacks = true; - //As a safety measure, for the first time update, //invalidate all timing edges via the pin invalidator //by passing in all the clb sink pins @@ -1099,62 +1069,116 @@ static void initialize_timing_info(float crit_exponent, } } - //Perform timing info update - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - timing_update_mode, - costs); - - //Initialize the data structure that stores committed placer setup slacks - commit_setup_slacks(setup_slacks); + //Perform first time update for all timing related classes + perform_full_timing_update(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); //Don't warn again about unconstrained nodes again during placement timing_info->set_warn_unconstrained(false); } -//Update timing information based on current placement by running STA. -//Record the new slack information as well as calculate the updated -//criticalities and timing costs (based on the new setup slacks) -static void update_setup_slacks_and_criticalities(float crit_exponent, - const PlaceDelayModel* delay_model, - PlacerCriticalities* criticalities, - PlacerSetupSlacks* setup_slacks, - ClusteredPinTimingInvalidator* pin_timing_invalidator, - SetupTimingInfo* timing_info, - t_placer_timing_update_mode* timing_update_mode, - t_placer_costs* costs) { - //Run STA to update slacks and adjusted/relaxed criticalities +/** + * @brief Update timing information based on the current block positions. + * + * Run STA to update the timing info class. + * + * Update the values stored in PlacerCriticalities and PlacerSetupSlacks + * if they are enabled to update. To enable updating, call their respective + * enable_update() method. See their documentation for more detailed info. + * + * If criticalities are updated, the timing driven costs should be updated + * as well by calling update_timing_cost(). Calling this routine to update + * timing_cost will produce round-off error in the long run due to its + * incremental nature, so the timing cost value will be recomputed once in + * a while, via other timing driven routines. + * + * If setup slacks are updated, then normally they should be committed to + * `connection_setup_slack` via commit_setup_slacks() routine. However, + * sometimes new setup slack values are not committed immediately if we + * expect to revert the current timing update in the near future, or if + * we wish to compare the new slack values to the original ones. + * + * All the pins with changed connection delays have already been added into + * the ClusteredPinTimingInvalidator to allow incremental STA update. These + * changed connection delays are a direct result of moved blocks in try_swap(). + */ +static void update_timing_classes(float crit_exponent, + SetupTimingInfo* timing_info, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator) { + /* Run STA to update slacks and adjusted/relaxed criticalities. */ timing_info->update(); - if (timing_update_mode->do_update_setup_slacks) { - //Update placer's setup slacks - setup_slacks->update_setup_slacks(timing_info, timing_update_mode->do_recompute_setup_slacks); - } + /* Update the placer's criticalities (e.g. sharpen with crit_exponent). */ + criticalities->update_criticalities(timing_info, crit_exponent); + + /* Update the placer's raw setup slacks. */ + setup_slacks->update_setup_slacks(timing_info); - if (timing_update_mode->do_update_criticalities) { - //Update placer's criticalities (e.g. sharpen with crit_exponent) - criticalities->update_criticalities(timing_info, crit_exponent, timing_update_mode->do_recompute_criticalities); + /* Clear invalidation state. */ + pin_timing_invalidator->reset(); +} - //Update connection, net and total timing costs based on new criticalities +/** + * @brief Update the timing driven (td) costs. + * + * This routine either uses incremental update_td_costs(), or updates + * from scratch using comp_td_costs(). By default, it is incremental + * by iterating over the set of clustered netlist connections/pins + * returned by PlacerCriticalities::pins_with_modified_criticality(). + * + * Hence, this routine should always be called when PlacerCriticalites + * is enabled to be updated in update_timing_classes(). Otherwise, the + * incremental method will no longer be correct. + */ +static void update_timing_cost(const PlaceDelayModel* delay_model, + const PlacerCriticalities* criticalities, + double* timing_cost) { #ifdef INCR_COMP_TD_COSTS - update_td_costs(delay_model, *criticalities, &costs->timing_cost); + update_td_costs(delay_model, *criticalities, timing_cost); #else - comp_td_costs(delay_model, *criticalities, &costs->timing_cost); + comp_td_costs(delay_model, *criticalities, timing_cost); #endif - } - - //Setup slacks and criticalities need to be in sync with the timing_info. - //if they are to be incrementally updated on the next iteration. - //Otherwise, a re-computation for all clb sink pins is required. - timing_update_mode->do_recompute_setup_slacks = !timing_update_mode->do_update_setup_slacks; - timing_update_mode->do_recompute_criticalities = !timing_update_mode->do_update_criticalities; +} - //Clear invalidation state - pin_timing_invalidator->reset(); +/** + * @brief Updates every timing related classes, variables and structures. + * + * This routine exists to reduce code duplication, as the placer routines + * often require updating every timing related stuff. + * + * Updates: SetupTimingInfo, PlacerCriticalities, PlacerSetupSlacks, + * timing_cost, connection_setup_slack. + */ +static void perform_full_timing_update(float crit_exponent, + const PlaceDelayModel* delay_model, + PlacerCriticalities* criticalities, + PlacerSetupSlacks* setup_slacks, + ClusteredPinTimingInvalidator* pin_timing_invalidator, + SetupTimingInfo* timing_info, + t_placer_costs* costs) { + /* Update all timing related classes. */ + criticalities->enable_update(); + setup_slacks->enable_update(); + update_timing_classes(crit_exponent, + timing_info, + criticalities, + setup_slacks, + pin_timing_invalidator); + + /* Update the timing cost with new connection criticalities. */ + update_timing_cost(delay_model, + criticalities, + &costs->timing_cost); + + /* Commit the setup slacks since they are updated. */ + commit_setup_slacks(setup_slacks); } /* Function which contains the inner loop of the simulated annealing */ @@ -1166,7 +1190,6 @@ static void placement_inner_loop(float t, float crit_exponent, int inner_recompute_limit, t_placer_statistics* stats, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, int* moves_since_cost_recompute, @@ -1194,7 +1217,6 @@ static void placement_inner_loop(float t, for (inner_iter = 0; inner_iter < move_lim; inner_iter++) { e_move_result swap_result = try_swap(t, crit_exponent, - timing_update_mode, costs, prev_inverse_costs, rlim, @@ -1234,22 +1256,14 @@ static void placement_inner_loop(float t, #ifdef VERBOSE VTR_LOG("Inner loop recompute criticalities\n"); #endif - /* Using the delays in connection_delay, do a timing analysis to update slacks and - * criticalities and update the timing cost since they will change. - */ - timing_update_mode->do_update_criticalities = true; - timing_update_mode->do_update_setup_slacks = true; - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - timing_update_mode, - costs); - - //Always commit the setup slacks when they are updated - commit_setup_slacks(setup_slacks); + //Update all timing related classes + perform_full_timing_update(crit_exponent, + delay_model, + criticalities, + setup_slacks, + pin_timing_invalidator, + timing_info, + costs); } inner_crit_iter_count++; } @@ -1427,7 +1441,6 @@ static bool update_annealing_state(t_annealing_state* state, } static float starting_t(float crit_exponent, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, t_annealing_sched annealing_sched, @@ -1464,7 +1477,6 @@ static float starting_t(float crit_exponent, //Will not deploy setup slack analysis, so omit crit_exponenet and setup_slack e_move_result swap_result = try_swap(t, crit_exponent, - timing_update_mode, costs, prev_inverse_costs, rlim, @@ -1540,7 +1552,6 @@ static void reset_move_nets(int num_nets_affected) { static e_move_result try_swap(float t, float crit_exponent, - t_placer_timing_update_mode* timing_update_mode, t_placer_costs* costs, t_placer_prev_inverse_costs* prev_inverse_costs, float rlim, @@ -1646,20 +1657,17 @@ static e_move_result try_swap(float t, //we only update those values and keep the criticalities stale //so as not to interfere with the original timing driven algorithm. // - //Note: the timing info must be called after applying block moves + //Note: the timing info must be updated after applying block moves //and committing the timing driven delays and costs. //If we wish to revert this timing update due to move rejection, //we need to revert block moves and restore the timing values. - timing_update_mode->do_update_criticalities = false; - timing_update_mode->do_update_setup_slacks = true; - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - timing_update_mode, - costs); + criticalities->disable_update(); + setup_slacks->enable_update(); + update_timing_classes(crit_exponent, + timing_info, + criticalities, + setup_slacks, + pin_timing_invalidator); /* Get the setup slack analysis cost */ //TODO: calculate a weighted average of the slack cost and wiring cost @@ -1736,14 +1744,11 @@ static e_move_result try_swap(float t, timing_info); /* Revert the timing update */ - update_setup_slacks_and_criticalities(crit_exponent, - delay_model, - criticalities, - setup_slacks, - pin_timing_invalidator, - timing_info, - timing_update_mode, - costs); + update_timing_classes(crit_exponent, + timing_info, + criticalities, + setup_slacks, + pin_timing_invalidator); VTR_ASSERT_SAFE_MSG( verify_connection_setup_slacks(setup_slacks), @@ -1950,6 +1955,24 @@ static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affect } } +/** + * @brief Check if the setup slack has gotten better or worse due to block swap. + * + * Get all the modified slack values via the PlacerSetupSlacks class, and compare + * then with the original values at these connections. Sort them and compare them + * one by one, and return the difference of the first different pair. + * + * If the new slack value is larger(better), than return a negative value so that + * the move will be accepted. If the new slack value is smaller(worse), return a + * positive value so that the move will be rejected. + * + * If no slack values have changed, then return an arbitrary positive number. A + * move resulting in no change in the slack values should probably be unnecessary. + * + * The sorting is need to prevent in the unlikely circumstances that a bad slack + * value suddenly got very good due to the block move, while a good slack value + * got very bad, perhaps even worse than the original worse slack value. + */ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& clb_nlist = cluster_ctx.clb_nlist; @@ -1966,7 +1989,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { proposed_setup_slacks.push_back(setup_slacks->setup_slack(net_id, ipin)); } - //Sort in ascending order, from worse slack value to best + //Sort in ascending order, from the worse slack value to the best std::sort(original_setup_slacks.begin(), original_setup_slacks.end()); std::sort(proposed_setup_slacks.begin(), proposed_setup_slacks.end()); @@ -1980,8 +2003,8 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { } } - //If all slack values are identical(or no modified slack values), - //reject this move by returning an arbitrary positive number as cost + //If all slack values are identical (or no modified slack values), + //reject this move by returning an arbitrary positive number as cost. return 1; } @@ -2086,10 +2109,23 @@ static void comp_td_connection_delays(const PlaceDelayModel* delay_model) { } } -//Commit all the setup slack values from the PlacerSetupSlacks class. -//This incremental routine will be correct if and only if it is called -//immediately after each time update_setup_slacks_and_criticalities -//updates the setup slacks (i.e. do_update_setup_slacks = true) +/** + * @brief Commit all the setup slack values from the PlacerSetupSlacks + * class to `connection_setup_slack`. + * + * This routine is incremental since it relies on the pins_with_modified_setup_slack() + * to detect which pins need to be updated and which pins do not. + * + * Therefore, it is assumed that this routine is always called immediately after + * each time update_timing_classes() is called with setup slack update enabled. + * Otherwise, pins_with_modified_setup_slack() cannot accurately account for all + * the pins that have their setup slacks changed, making this routine incorrect. + * + * Currently, the only exception to the rule above is when setup slack analysis is used + * during the placement quench. The new setup slacks might be either accepted or + * rejected, so for efficiency reasons, this routine is not called if the slacks are + * rejected in the end. For more detailed info, see the try_swap() routine. + */ static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; @@ -2103,6 +2139,17 @@ static void commit_setup_slacks(const PlacerSetupSlacks* setup_slacks) { } } +/** + * @brief Verify that the values in `connection_setup_slack` matches PlacerSetupSlacks. + * + * Return true if all connection values are identical. Otherwise, return false. + * + * Currently, this routine is called to check if the timing update has been successfully + * reverted after a proposed move is rejected when applying setup slack analysis during + * the placement quench. If successful, the setup slacks in PlacerSetupSlacks should be + * the same as the values in `connection_setup_slack` without running commit_setup_slacks(). + * For more detailed info, see the try_swap() routine. + */ static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks) { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; @@ -2114,7 +2161,6 @@ static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks } } } - return true; } diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index f7d940dfd5f..0c06c134709 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -1,3 +1,8 @@ +/** + * @file timing_place.cpp + * @brief Stores the method definitions of classes defined in timing_place.h. + */ + #include #include @@ -14,35 +19,41 @@ #include "timing_info.h" -//Use an incremental approach to updating criticalities and setup slacks? -constexpr bool INCR_UPDATE_CRITICALITIES = true; -constexpr bool INCR_UPDATE_SETUP_SLACKS = true; - -/**************************************/ +///@brief Use an incremental approach to updating criticalities and setup slacks? +static constexpr bool INCR_UPDATE_CRITICALITIES = true, INCR_UPDATE_SETUP_SLACKS = true; -/* Allocates space for the timing_place_crit_ data structure * - * I chunk the data to save space on large problems. */ +///@brief Allocates space for the timing_place_crit_ data structure. PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) , timing_place_crit_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } -void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent, bool recompute) { - //If the criticalities are not updated immediately after each time we call - //timing_info->update(), then timing_info->pins_with_modified_setup_criticality() - //cannot accurately account for all the pins that need to be updated. - //In this case, we pass in recompute=true to update all criticalities from scratch. - // - //If the criticality exponent has changed, we also need to update from scratch. +/** + * @brief Updated the criticalities in the timing_place_crit_ data structure. + * + * If the criticalities are not updated immediately after each time we call + * timing_info->update(), then timing_info->pins_with_modified_setup_criticality() + * cannot accurately account for all the pins that need to be updated. In this case, + * `recompute_required` would be true, and we update all criticalities from scratch. + * + * If the criticality exponent has changed, we also need to update from scratch. + */ +void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_info, float crit_exponent) { + /* If update is not enabled, exit the routine. */ + if (!update_enabled) { + /* re-computation is required on the next iteration */ + recompute_required = true; + return; + } - //Determine what pins need updating - if (!recompute && crit_exponent == last_crit_exponent_ && INCR_UPDATE_CRITICALITIES) { + /* Determine what pins need updating */ + if (!recompute_required && crit_exponent == last_crit_exponent_ && INCR_UPDATE_CRITICALITIES) { incr_update_criticalities(timing_info); } else { recompute_criticalities(); - //Record new criticality exponent + /* Record new criticality exponent */ last_crit_exponent_ = crit_exponent; } @@ -50,7 +61,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf * For every pin on every net (or, equivalently, for every tedge ending * in that pin), timing_place_crit_ = criticality^(criticality exponent) */ - // Update the effected pins + /* Update the affected pins */ for (ClusterPinId clb_pin : cluster_pins_with_modified_criticality_) { ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); @@ -62,20 +73,27 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf * criticality by taking it to some power, crit_exponent (between 1 and 8 by default). */ timing_place_crit_[clb_net][pin_index_in_net] = pow(clb_pin_crit, crit_exponent); } + + /* Criticalities updated. In sync with timing info. */ + /* Can be incrementally updated on the next iteration */ + recompute_required = false; } +/** + * @brief Collect the cluster pins which need to be updated based on the latest timing + * analysis so that incremental updates to criticalities can be performed. + * + * Note we use the set of pins reported by the *timing_info* as having modified + * criticality, rather than those marked as modified by the timing analyzer. + * + * Since timing_info uses shifted/relaxed criticality (which depends on max required + * time and worst case slacks), additional nodes may be modified when updating the + * atom pin criticalities. + */ + void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timing_info) { cluster_pins_with_modified_criticality_.clear(); - //Collect the cluster pins which need to be updated based on the latest timing - //analysis - // - //Note we use the set of pins reported by the *timing_info* as having modified - //criticality, rather than those marked as modified by the timing analyzer. - //Since timing_info uses shifted/relaxed criticality (which depends on max - //required time and worst case slacks), additional nodes may be modified - //when updating the atom pin criticalities. - for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_criticality()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); @@ -88,10 +106,15 @@ void PlacerCriticalities::incr_update_criticalities(const SetupTimingInfo* timin } } +/** + * @brief Collect all the sink pins in the netlist and prepare them update. + * + * For the incremental version, see PlacerCriticalities::incr_update_criticalities(). + */ void PlacerCriticalities::recompute_criticalities() { cluster_pins_with_modified_criticality_.clear(); - //Non-incremental: all sink pins need updating + /* Non-incremental: all sink pins need updating */ for (ClusterNetId net_id : clb_nlist_.nets()) { for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { cluster_pins_with_modified_criticality_.insert(pin_id); @@ -99,35 +122,54 @@ void PlacerCriticalities::recompute_criticalities() { } } +///@brief Override the criticality of a particular connection. void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float val) { timing_place_crit_[net_id][ipin] = val; } +/** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which + * were modified by the last call to PlacerCriticalities::update_criticalities(). + */ PlacerCriticalities::pin_range PlacerCriticalities::pins_with_modified_criticality() const { return vtr::make_range(cluster_pins_with_modified_criticality_); } /**************************************/ -/* Allocates space for the timing_place_setup_slacks_ data structure */ +///@brief Allocates space for the timing_place_setup_slacks_ data structure. PlacerSetupSlacks::PlacerSetupSlacks(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) : clb_nlist_(clb_nlist) , pin_lookup_(netlist_pin_lookup) , timing_place_setup_slacks_(make_net_pins_matrix(clb_nlist_, std::numeric_limits::quiet_NaN())) { } -void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info, bool recompute) { - //If the setup slacks are not updated immediately after each time we call - //timing_info->update(), then timing_info->pins_with_modified_setup_slack() - //cannot accurately account for all the pins that need to be updated. - //In this case, we pass in recompute=true to update all setup slacks from scratch. - if (!recompute && INCR_UPDATE_SETUP_SLACKS) { +/** + * @brief Updated the setup slacks in the timing_place_setup_slacks_ data structure. + * + * If the setup slacks are not updated immediately after each time we call + * timing_info->update(), then timing_info->pins_with_modified_setup_slack() + * cannot accurately account for all the pins that need to be updated. + * + * In this case, `recompute_required` would be true, and we update all setup slacks + * from scratch. + */ +void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) { + /* If update is not enabled, exit the routine. */ + if (!update_enabled) { + /* re-computation is required on the next iteration */ + recompute_required = true; + return; + } + + /* Determine what pins need updating */ + if (!recompute_required && INCR_UPDATE_SETUP_SLACKS) { incr_update_setup_slacks(timing_info); } else { recompute_setup_slacks(); } - //Update the effected pins + /* Update the affected pins */ for (ClusterPinId clb_pin : cluster_pins_with_modified_setup_slack_) { ClusterNetId clb_net = clb_nlist_.pin_net(clb_pin); int pin_index_in_net = clb_nlist_.pin_net_index(clb_pin); @@ -136,15 +178,22 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info, timing_place_setup_slacks_[clb_net][pin_index_in_net] = clb_pin_setup_slack; } + + /* Setup slacks updated. In sync with timing info. */ + /* Can be incrementally updated on the next iteration. */ + recompute_required = false; } +/** + * @brief Collect the cluster pins which need to be updated based on the latest timing + * analysis so that incremental updates to setup slacks can be performed. + * + * Note we use the set of pins reported by the *timing_info* as having modified + * setup slacks, rather than those marked as modified by the timing analyzer. + */ void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_info) { cluster_pins_with_modified_setup_slack_.clear(); - //Collect the cluster pins which need to be updated based on the latest timing analysis - // - //Note we use the set of pins reported by the *timing_info* as having modified - //setup slacks, rather than those marked as modified by the timing analyzer. for (AtomPinId atom_pin : timing_info->pins_with_modified_setup_slack()) { ClusterPinId clb_pin = pin_lookup_.connected_clb_pin(atom_pin); @@ -157,10 +206,15 @@ void PlacerSetupSlacks::incr_update_setup_slacks(const SetupTimingInfo* timing_i } } +/** + * @brief Collect all the sink pins in the netlist and prepare them update. + * + * For the incremental version, see PlacerSetupSlacks::incr_update_setup_slacks(). + */ void PlacerSetupSlacks::recompute_setup_slacks() { cluster_pins_with_modified_setup_slack_.clear(); - //Non-incremental: all sink pins need updating + /* Non-incremental: all sink pins need updating */ for (ClusterNetId net_id : clb_nlist_.nets()) { for (ClusterPinId pin_id : clb_nlist_.net_sinks(net_id)) { cluster_pins_with_modified_setup_slack_.insert(pin_id); @@ -168,10 +222,15 @@ void PlacerSetupSlacks::recompute_setup_slacks() { } } +///@brief Override the setup slack of a particular connection. void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float val) { timing_place_setup_slacks_[net_id][ipin] = val; } +/** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) + * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). + */ PlacerSetupSlacks::pin_range PlacerSetupSlacks::pins_with_modified_setup_slack() const { return vtr::make_range(cluster_pins_with_modified_setup_slack_); } diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index d37983730f5..6bf6420a429 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -1,3 +1,32 @@ +/** + * @file timing_place.h + * @brief Interface used by the VPR placer to query information + * from the Tatum timing analyzer. + * + * @class PlacerSetupSlacks + * Queries connection **RAW** setup slacks, which can + * range from negative to positive values. Also maps + * atom pin setup slacks to clb pin setup slacks. + * @class PlacerCriticalities + * Query connection criticalities, which are calculuated + * based on the raw setup slacks and ranges from 0 to 1. + * Also maps atom pin crit. to clb pin crit. + * @class PlacerTimingCosts + * Hierarchical structure used by update_td_costs() to + * maintain the order of addition operation of float values + * (to avoid round-offs) while doing incremental updates. + * + * Calculating criticalities: + * All the raw setup slack values across a single clock domain are gathered, shifted, + * and rated from best to worst. The best shifted slack value (the most positive one) + * will have a criticality of 0, while the worse shifted slack value (always 0) + * will have a criticality of 1. Criticalities are used to calculated timing costs + * for each connection (delay * criticality). + * + * For a more detailed description on how criticalities are calculated, see + * calc_relaxed_criticality() in `timing_util.cpp`. + */ + #ifndef TIMING_PLACE #define TIMING_PLACE @@ -14,32 +43,45 @@ std::unique_ptr alloc_lookups_and_criticalities(t_chan_width_di std::vector& segment_inf, const t_direct_inf* directs, const int num_directs); -/* Usage + +/** + * @brief PlacerCriticalities returns the clustered netlist connection criticalities + * used by the placer ('sharpened' by a criticality exponent). + * + * Usage * ===== - * PlacerCriticalities returns the clustered netlist connection criticalities used by - * the placer ('sharpened' by a criticality exponent). This also serves to map atom - * netlist level criticalites (i.e. on AtomPinIds) to the clustered netlist (i.e. - * ClusterPinIds) used during placement. + * This class also serves to map atom netlist level criticalites (i.e. on AtomPinIds) + * to the clustered netlist (i.e. ClusterPinIds) used during placement. * - * Criticalities are calculated by calling update_criticalities(), which will - * update criticalities based on the atom netlist connection criticalities provided by - * the passed in SetupTimingInfo. This is done incrementally, based on the modified - * connections/AtomPinIds returned by SetupTimingInfo. + * Criticalities are updated by update_criticalities(), given that `update_enabled` is + * set to true. It will update criticalities based on the atom netlist connection + * criticalities provided by the passed in SetupTimingInfo. * - * The criticalities of individual connections can then be queried by calling the - * criticality() member function. + * This process can be done incrementally, based on the modified connections/AtomPinIds + * returned by SetupTimingInfo. However, the set returned only reflects the connections + * changed by the last call to the timing info update. * - * It also supports iterating via pins_with_modified_criticalities() through the - * clustered netlist pins/connections which have had their criticality modified by - * the last call to update_criticalities(), which is useful for incrementally - * re-calculating timing costs. + * Therefore, if SetupTimingInfo is updated twice in succession without criticalities + * getting updated (update_enabled = false), the returned set cannot account for all + * the connections that have been modified, in which case a recomputation is required. + * Hence, each time update_setup_slacks_and_criticalities() is called, we assign + * `recompute_required` the opposite value of `update_enabled`. + * + * This class also maps/transforms the modified atom connections/pins returned by the + * timing info into modified clustered netlist connections/pins after calling + * update_criticalities(). The interface then enables users to iterate over this range + * via pins_with_modified_criticalities(). This is useful for incrementally re-calculating + * the timing costs. + * + * The criticalities of individual connections can then be queried by calling the + * criticality() member function. * * Implementation * ============== - * To support incremental re-calculation the class saves the last criticality exponent - * passed to update_criticalites(). If the next update uses the same exponent criticalities - * can be incrementally updated. Otherwise they must be re-calculated from scratch, since - * a change in exponent changes *all* criticalities. + * To support incremental re-calculation, the class saves the last criticality exponent + * passed to PlacerCriticalities::update_criticalites(). If the next update uses the same + * exponent, criticalities can be incrementally updated. Otherwise, they must be re-calculated + * from scratch, since a change in exponent changes *all* criticalities. */ class PlacerCriticalities { public: //Types @@ -55,55 +97,81 @@ class PlacerCriticalities { PlacerCriticalities& operator=(const PlacerCriticalities& clb_nlist) = delete; public: //Accessors - //Returns the criticality of the specified connection + ///@brief Returns the criticality of the specified connection. float criticality(ClusterNetId net, int ipin) const { return timing_place_crit_[net][ipin]; } - //Returns the range of clustered netlist pins (i.e. ClusterPinIds) which were modified - //by the last call to update_criticalities() + /** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) which + * were modified by the last call to PlacerCriticalities::update_criticalities(). + */ pin_range pins_with_modified_criticality() const; public: //Modifiers - //Updates criticalities based on the atom netlist criticalitites provided by - //timing_info and the provided criticality_exponent. - void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent, bool recompute); + /** + * @brief Updates criticalities based on the atom netlist criticalitites + * provided by timing_info and the provided criticality_exponent. + */ + void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent); - //Override the criticality of a particular connection + ///@brief Override the criticality of a particular connection. void set_criticality(ClusterNetId net, int ipin, float val); + ///@brief Set `update_enabled` to true. + void enable_update() { update_enabled = true; } + + ///@brief Set `update_enabled` to true. + void disable_update() { update_enabled = false; } + private: //Data + ///@brief The clb netlist in the placement context. const ClusteredNetlist& clb_nlist_; - const ClusteredPinAtomPinsLookup& pin_lookup_; - ClbNetPinsMatrix timing_place_crit_; /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ + ///@brief The lookup table that maps atom pins to clb pins. + const ClusteredPinAtomPinsLookup& pin_lookup_; - //The criticality exponent when update_criticalites() was last called (used to detect if incremental update can be used) + /** + * @brief The matrix that stores criticality value for each connection. + * + * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] + */ + ClbNetPinsMatrix timing_place_crit_; + + /** + * The criticality exponent when update_criticalites() was last called + * (used to detect if incremental update can be used). + */ float last_crit_exponent_ = std::numeric_limits::quiet_NaN(); - //Set of pins with criticaltites modified by last call to update_criticalities() + ///@brief Set of pins with criticaltites modified by last call to update_criticalities(). vtr::vec_id_set cluster_pins_with_modified_criticality_; - //Updates criticalities: incremental V.S. from scratch + ///@brief Updates criticalities: incremental V.S. from scratch void incr_update_criticalities(const SetupTimingInfo* timing_info); void recompute_criticalities(); + + ///@brief Flag that turns on/off the update_criticalities() routine. + bool update_enabled = true; + + ///@brief Flag that checks if criticalities needs to be recomputed for all connections. + bool recompute_required = true; }; -/* Usage - * ===== - * PlacerSetupSlacks returns the clustered netlist connection setup slack used by - * the placer. This also serves to map atom netlist level slack (i.e. on AtomPinIds) - * to the clustered netlist (i.e. ClusterPinIds) used during placement. +/** + * @brief PlacerSetupSlacks returns the RAW setup slacks of clustered netlist connection. * - * Setup slacks are calculated by calling update_setup_slacks(), which will - * update setup slacks based on the atom netlist connection setup slacks provided by - * the passed in SetupTimingInfo. This is done incrementally, based on the modified - * connections/AtomPinIds returned by SetupTimingInfo. + * Usage + * ===== + * This class mirrors PlacerCriticalities by both its methods and its members. The only + * difference is that this class deals with RAW setup slacks returned by SetupTimingInfo + * rather than criticalities. See the documentation on PlacerCriticalities for more. * - * The setup slacks of individual connections can then be queried by calling the - * setup_slack() member function. + * RAW setup slacks are unlike criticalities. Their values are not confined between + * 0 and 1. Their values can be either positive or negative. * - * It also supports iterating via pins_with_modified_setup_slack() through the - * clustered netlist pins/connections which have had their setup slacks modified by - * the last call to update_setup_slacks(). + * This class also provides iterating over the clustered netlist connections/pins that + * have modified setup slacks by the last call to update_setup_slacks(). However, this + * utility is mainly used for incrementally committing the setup slack values into the + * structure `connection_setup_slack` used by many placer routines. */ class PlacerSetupSlacks { public: //Types @@ -119,40 +187,58 @@ class PlacerSetupSlacks { PlacerSetupSlacks& operator=(const PlacerSetupSlacks& clb_nlist) = delete; public: //Accessors - //Returns the setup slack of the specified connection + ///@brief Returns the setup slack of the specified connection. float setup_slack(ClusterNetId net, int ipin) const { return timing_place_setup_slacks_[net][ipin]; } - //Returns the range of clustered netlist pins (i.e. ClusterPinIds) which were modified - //by the last call to update_setup_slacks() + /** + * @brief Returns the range of clustered netlist pins (i.e. ClusterPinIds) + * which were modified by the last call to PlacerSetupSlacks::update_setup_slacks(). + */ pin_range pins_with_modified_setup_slack() const; public: //Modifiers - //Updates setup slacks based on the atom netlist setup slacks provided by timing_info - void update_setup_slacks(const SetupTimingInfo* timing_info, bool recompute); + ///@brief Updates setup slacks based on the atom netlist setup slacks provided by timing_info. + void update_setup_slacks(const SetupTimingInfo* timing_info); - //Override the setup slack of a particular connection + ///@brief Override the setup slack of a particular connection. void set_setup_slack(ClusterNetId net, int ipin, float val); + ///@brief Set `update_enabled` to true. + void enable_update() { update_enabled = true; } + + ///@brief Set `update_enabled` to true. + void disable_update() { update_enabled = false; } + private: //Data const ClusteredNetlist& clb_nlist_; const ClusteredPinAtomPinsLookup& pin_lookup_; - ClbNetPinsMatrix timing_place_setup_slacks_; /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ + /** + * @brief The matrix that stores raw setup slack values for each connection. + * + * Index range: [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] + */ + ClbNetPinsMatrix timing_place_setup_slacks_; - //Set of pins with criticaltites modified by last call to update_criticalities() + ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks() vtr::vec_id_set cluster_pins_with_modified_setup_slack_; - //Updates setup slacks: incremental V.S. from scratch + ///@brief Updates setup slacks: incremental V.S. from scratch. void incr_update_setup_slacks(const SetupTimingInfo* timing_info); void recompute_setup_slacks(); + + ///@brief Flag that turns on/off the update_setup_slacks() routine. + bool update_enabled = true; + + ///@brief Flag that checks if setup slacks needs to be recomputed for all connections. + bool recompute_required = true; }; -/* Usage - * ===== - * PlacerTimingCosts mimics a 2D array of connection timing costs running from: - * [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] +/** + * @brief PlacerTimingCosts mimics a 2D array of connection timing costs running from: + * [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1]. * - * So it can be used similar to: + * It can be used similar to: * * PlacerTimingCosts connection_timing_costs(cluster_ctx.clb_nlist); //Construct * @@ -163,53 +249,53 @@ class PlacerSetupSlacks { * * //Potentially other modifications... * - * //Calculate the updated timing cost, of all connections, incrementally based - * //on modifications + * //Calculate the updated timing cost, of all connections, + * //incrementally based on modifications * float total_timing_cost = connection_timing_costs.total_cost(); - * + * * However behind the scenes PlacerTimingCosts tracks when connection costs are modified, * and efficiently re-calculates the total timing cost incrementally based on the connections * which have had their cost modified. * - * Implementaion - * ============= - * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part + * Implementation + * ============== + * Internally, PlacerTimingCosts stores all connection costs in a flat array in the last part * of connection_costs_. To mimic 2d-array like access PlacerTimingCosts also uses two proxy * classes which allow indexing in the net and pin dimensions (NetProxy and ConnectionProxy * respectively). * * The first part of connection_costs_ stores intermediate sums of the connection costs for - * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary + * efficient incremental re-calculation. More concretely, connection_costs_ stores a binary * tree, where leaves correspond to individual connection costs and intermediate nodes the - * partial sums of the connection costs. (The binary tree is stored implicitly in the - * connection_costs_ vector, using Eytzinger's/BFS layout.) By summing the entire binary + * partial sums of the connection costs. (The binary tree is stored implicitly in the + * connection_costs_ vector, using Eytzinger's/BFS layout.) By summing the entire binary * tree we calculate the total timing cost over all connections. * * Using a binary tree allows us to efficiently re-calculate the timing costs when only a subset * of connections are changed. This is done by 'invalidating' intermediate nodes (from leaves up - * to the root) which have ancestors (leaves) with modified connection costs. When the + * to the root) which have ancestors (leaves) with modified connection costs. When the * total_cost() method is called, it recursively walks the binary tree to re-calculate the cost. - * Only invalidated nodes are traversed, with valid nodes just returning their previously + * Only invalidated nodes are traversed, with valid nodes just returning their previously * calculated (and unchanged) value. * - * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can + * For a circuit with 'K' connections, of which 'k' have changed (typically k << K), this can * be done in O(k log K) time. * - * It is important to note that due to limited floating point precision, floating point + * It is important to note that due to limited floating point precision, floating point * arithmetic has an order dependence (due to round-off). Using a binary tree to total * the timing connection costs allows us to incrementally update the total timign cost while - * maintianing the *same order of operations* as if it was re-computed from scratch. This + * maintianing the *same order of operations* as if it was re-computed from scratch. This * ensures we *always* get consistent results regardless of what/when connections are changed. * * Proxy Classes - * ------------- + * ============= * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of * internal storage of that net's connection costs. * - * ConnectionProxy is returnd by NetProxy's operator[], and holds a reference to a particular - * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy - * supports assignment, allowing clients to modify the connection cost. It also detects if the - * assigned value differs from the previous value and if so, calls PlacerTimingCosts's + * ConnectionProxy is returnd by NetProxy's operator[], and holds a reference to a particular + * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy + * supports assignment, allowing clients to modify the connection cost. It also detects if the + * assigned value differs from the previous value and if so, calls PlacerTimingCosts's * invalidate() method on that connection cost. * * PlacerTimingCosts's invalidate() method marks the cost element's ancestors as invalid (NaN) @@ -257,7 +343,9 @@ class PlacerTimingCosts { size_t num_level_before_leaves = num_nodes_in_level(ilevel - 1); VTR_ASSERT_MSG(num_leaves >= num_connections, "Need at least as many leaves as connections"); - VTR_ASSERT_MSG(num_connections == 0 || num_level_before_leaves < num_connections, "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); + VTR_ASSERT_MSG( + num_connections == 0 || num_level_before_leaves < num_connections, + "Level before should have fewer nodes than connections (to ensure using the smallest binary tree)"); //We don't need to store all possible leaves if we have fewer connections //(i.e. bottom-right of tree is empty) @@ -277,16 +365,19 @@ class PlacerTimingCosts { } } - //Proxy class representing a connection cost - // Supports modification of connection cost while detecting changes and - // reporting them up to PlacerTimingCosts + /** + * @brief Proxy class representing a connection cost. + * + * Supports modification of connection cost while detecting + * changes and reporting them up to PlacerTimingCosts. + */ class ConnectionProxy { public: ConnectionProxy(PlacerTimingCosts* timing_costs, double& connection_cost) : timing_costs_(timing_costs) , connection_cost_(connection_cost) {} - //Allow clients to modify the connection cost via assignment + ///@brief Allow clients to modify the connection cost via assignment. ConnectionProxy& operator=(double new_cost) { if (new_cost != connection_cost_) { //If connection cost changed, update it, and mark it @@ -297,9 +388,11 @@ class PlacerTimingCosts { return *this; } - //Support getting the current connection cost as a double - // Useful for client code operating on the cost values (e.g. - // difference between costs) + /** + * @brief Support getting the current connection cost as a double. + * + * Useful for client code operating on the cost values (e.g. difference between costs). + */ operator double() { return connection_cost_; } @@ -309,15 +402,18 @@ class PlacerTimingCosts { double& connection_cost_; }; - //Proxy class representing the connection costs of a net - // Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection + /** + * @brief Proxy class representing the connection costs of a net. + * + * Supports indexing by pin index to retrieve the ConnectionProxy for that pin/connection. + */ class NetProxy { public: NetProxy(PlacerTimingCosts* timing_costs, double* net_sink_costs) : timing_costs_(timing_costs) , net_sink_costs_(net_sink_costs) {} - //Indexes into the specific net pin/connection + ///@brief Indexes into the specific net pin/connection. ConnectionProxy operator[](size_t ipin) { return ConnectionProxy(timing_costs_, net_sink_costs_[ipin]); } @@ -327,7 +423,7 @@ class PlacerTimingCosts { double* net_sink_costs_; }; - //Indexes into the specific net + ///@brief Indexes into the specific net. NetProxy operator[](ClusterNetId net_id) { VTR_ASSERT_SAFE(net_start_indicies_[net_id] >= 0); @@ -346,8 +442,10 @@ class PlacerTimingCosts { std::swap(num_levels_, other.num_levels_); } - //Calculates the total cost of all connections efficiently - //in the face of modified connection costs + /** + * @brief Calculates the total cost of all connections efficiently + * in the face of modified connection costs. + */ double total_cost() { float cost = total_cost_recurr(0); //Root @@ -358,7 +456,7 @@ class PlacerTimingCosts { } private: - //Recursively calculate and update the timing cost rooted at inode + ///@brief Recursively calculate and update the timing cost rooted at inode. double total_cost_recurr(size_t inode) { //Prune out-of-tree if (inode > connection_costs_.size() - 1) { @@ -393,12 +491,18 @@ class PlacerTimingCosts { return node_cost; } - friend ConnectionProxy; //So it can call invalidate() + ///@brief Friend-ed so it can call invalidate(). + friend ConnectionProxy; void invalidate(double* invalidated_cost) { //Check pointer within range of internal storage - VTR_ASSERT_SAFE_MSG(invalidated_cost >= &connection_costs_[0], "Connection cost pointer should be after start of internal storage"); - VTR_ASSERT_SAFE_MSG(invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], "Connection cost pointer should be before end of internal storage"); + VTR_ASSERT_SAFE_MSG( + invalidated_cost >= &connection_costs_[0], + "Connection cost pointer should be after start of internal storage"); + + VTR_ASSERT_SAFE_MSG( + invalidated_cost <= &connection_costs_[connection_costs_.size() - 1], + "Connection cost pointer should be before end of internal storage"); size_t icost = invalidated_cost - &connection_costs_[0]; @@ -407,7 +511,7 @@ class PlacerTimingCosts { //Invalidate parent intermediate costs up to root or first //already-invalidated parent size_t iparent = parent(icost); - ; + while (!std::isnan(connection_costs_[iparent])) { //Invalidate connection_costs_[iparent] = std::numeric_limits::quiet_NaN(); @@ -435,33 +539,41 @@ class PlacerTimingCosts { return (i - 1) / 2; } - //Returns the number of nodes in ilevel'th level - //If ilevel is negative, return 0, since the root shouldn't be counted - //as a leaf node candidate + /** + * @brief Returns the number of nodes in ilevel'th level. + * + * If ilevel is negative, return 0, since the root shouldn't + * be counted as a leaf node candidate. + */ size_t num_nodes_in_level(int ilevel) const { return ilevel < 0 ? 0 : (2 << (ilevel)); } - //Returns the total number of nodes in levels [0..ilevel] (inclusive) + ///@brief Returns the total number of nodes in levels [0..ilevel] (inclusive). size_t num_nodes_up_to_level(int ilevel) const { return (2 << (ilevel + 1)) - 1; } private: - //Vector storing the implicit binary tree of connection costs - // The actual connections are stored at the end of the vector - // (last level of the binary tree). The earlier portions of - // the tree are the intermediate nodes. - // - // The methods left_child()/right_child()/parent() can be used - // to traverse the tree by indicies into this vector + /** + * @brief Vector storing the implicit binary tree of connection costs. + * + * The actual connections are stored at the end of the vector + * (last level of the binary tree). The earlier portions of + * the tree are the intermediate nodes. + * + * The methods left_child()/right_child()/parent() can be used + * to traverse the tree by indicies into this vector. + */ std::vector connection_costs_; - //Vector storing the indicies of the first connection for - //each net in the netlist, used for indexing by net. + /** + * @brief Vector storing the indicies of the first connection + * for each net in the netlist, used for indexing by net. + */ vtr::vector net_start_indicies_; - //Number of levels in the binary tree + ///@brief Number of levels in the binary tree. size_t num_levels_ = 0; }; diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index 5bff2ac8324..d1da2fbc164 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -564,10 +564,13 @@ std::map count_clock_fanouts(const tatum::TimingGraph& } /* - * Slack and criticality calculation utilities + * Criticalities and setup slacks calculation utilities */ -//Return the criticality of a net's pin in the CLB netlist +/** + * @brief Returns the criticality of a net's pin in the CLB netlist. + * Assumes that the timing graph is correct and up to date. + */ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) { //There may be multiple atom netlist pins connected to this CLB pin float clb_pin_crit = 0.; @@ -579,18 +582,16 @@ float calculate_clb_net_pin_criticality(const SetupTimingInfo& timing_info, cons return clb_pin_crit; } -//Return the setup slack of a net's pin in the CLB netlist +/** + * @brief Returns the raw setup slack of a net's pin in the CLB netlist. + * Assumes that the timing graph is correct and up to date. + */ float calculate_clb_net_pin_setup_slack(const SetupTimingInfo& timing_info, const ClusteredPinAtomPinsLookup& pin_lookup, ClusterPinId clb_pin) { //There may be multiple atom netlist pins connected to this CLB pin - float clb_pin_setup_slack = std::numeric_limits::quiet_NaN(); - + float clb_pin_setup_slack = std::numeric_limits::infinity(); for (const auto atom_pin : pin_lookup.connected_atom_pins(clb_pin)) { - //Take the worst of the atom pin slacks as the CLB pin slack - if (std::isnan(clb_pin_setup_slack)) { - clb_pin_setup_slack = timing_info.setup_pin_slack(atom_pin); - } else { - clb_pin_setup_slack = std::min(clb_pin_setup_slack, timing_info.setup_pin_slack(atom_pin)); - } + //Take the worst/minimum of the atom pin slack as the CLB pin slack + clb_pin_setup_slack = std::min(clb_pin_setup_slack, timing_info.setup_pin_slack(atom_pin)); } return clb_pin_setup_slack; From df9db484ad150ed575266ac5f4f6293b04ae9740 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 1 Sep 2020 00:04:34 -0400 Subject: [PATCH 17/24] Removed quench metric option. --- vpr/src/base/SetupVPR.cpp | 2 -- vpr/src/base/read_options.cpp | 46 ----------------------------------- vpr/src/base/read_options.h | 1 - vpr/src/base/vpr_types.h | 7 ------ vpr/src/place/place.cpp | 26 +------------------- 5 files changed, 1 insertion(+), 81 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 546fdc2f029..c10609e6857 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -570,8 +570,6 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->effort_scaling = Options.place_effort_scaling; PlacerOpts->timing_update_type = Options.timing_update_type; - - PlacerOpts->place_quench_metric = Options.place_quench_metric; } static void SetupAnalysisOpts(const t_options& Options, t_analysis_opts& analysis_opts) { diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 76bd56d0126..b40e867f672 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -959,41 +959,6 @@ struct ParseTimingUpdateType { } }; -struct ParsePlaceQuenchMetric { - ConvertedValue from_str(std::string str) { - ConvertedValue conv_value; - if (str == "auto") - conv_value.set_value(e_place_quench_metric::AUTO); - else if (str == "timing_cost") - conv_value.set_value(e_place_quench_metric::TIMING_COST); - else if (str == "setup_slack") - conv_value.set_value(e_place_quench_metric::SETUP_SLACK); - else { - std::stringstream msg; - msg << "Invalid conversion from '" << str << "' to e_place_quench_metric (expected one of: " << argparse::join(default_choices(), ", ") << ")"; - conv_value.set_error(msg.str()); - } - return conv_value; - } - - ConvertedValue to_str(e_place_quench_metric val) { - ConvertedValue conv_value; - if (val == e_place_quench_metric::AUTO) - conv_value.set_value("auto"); - if (val == e_place_quench_metric::TIMING_COST) - conv_value.set_value("timing_cost"); - else { - VTR_ASSERT(val == e_place_quench_metric::SETUP_SLACK); - conv_value.set_value("setup_slack"); - } - return conv_value; - } - - std::vector default_choices() { - return {"auto", "timing_cost", "setup_slack"}; - } -}; - argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& args) { std::string description = "Implements the specified circuit onto the target FPGA architecture" @@ -1782,17 +1747,6 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); - place_timing_grp.add_argument(args.place_quench_metric, "--place_quench_metric") - .help( - "Controls which cost function the placer uses during the quench stage:\n" - " * auto: VPR decides\n" - " * timing_cost: The same cost formulation as the one used during\n" - " the annealing stage (more stable)\n" - " * setup_slack: Directly uses setup slacks (in combination with wiring)\n" - " to check if the block moves should be accepted\n") - .default_value("auto") - .show_in(argparse::ShowIn::HELP_ONLY); - auto& route_grp = parser.add_argument_group("routing options"); route_grp.add_argument(args.max_router_iterations, "--max_router_iterations") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 5964904072a..e3e1307823e 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -130,7 +130,6 @@ struct t_options { argparse::ArgValue place_delay_model; argparse::ArgValue place_delay_model_reducer; argparse::ArgValue allowed_tiles_for_delay_model; - argparse::ArgValue place_quench_metric; /* Router Options */ argparse::ArgValue check_rr_graph; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 9019dacba91..34f08d250f6 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -885,12 +885,6 @@ enum class e_place_delta_delay_algorithm { DIJKSTRA_EXPANSION, }; -enum class e_place_quench_metric { - TIMING_COST, - SETUP_SLACK, - AUTO -}; - struct t_placer_opts { enum e_place_algorithm place_algorithm; float timing_tradeoff; @@ -939,7 +933,6 @@ struct t_placer_opts { std::string allowed_tiles_for_delay_model; e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method; - e_place_quench_metric place_quench_metric; }; /* All the parameters controlling the router's operation are in this * diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 7fb89b70b15..17f95b17326 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -534,8 +534,6 @@ static void print_resources_utilization(); static void init_annealing_state(t_annealing_state* state, const t_annealing_sched& annealing_sched, float t, float rlim, int move_lim_max, float crit_exponent); -static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts); - /*****************************************************************************/ void try_place(const t_placer_opts& placer_opts, t_annealing_sched annealing_sched, @@ -880,11 +878,6 @@ void try_place(const t_placer_opts& placer_opts, state.t = 0; /* freeze out */ - //Use setup slack analysis if the placer is timing driven - //and the quench metric is SETUP_SLACK. Otherwise, use the - //same cost formulation as the annealing stage - auto quench_algorithm = get_placement_quench_algorithm(placer_opts); - /* Run inner loop again with temperature = 0 so as to accept only swaps * which reduce the cost of the placement */ placement_inner_loop(state.t, num_temps, state.rlim, placer_opts, @@ -898,7 +891,7 @@ void try_place(const t_placer_opts& placer_opts, *move_generator, blocks_affected, timing_info.get(), - quench_algorithm); + placer_opts.place_algorithm); tot_iter += move_lim; ++num_temps; @@ -3361,23 +3354,6 @@ static void init_annealing_state(t_annealing_state* state, state->crit_exponent = crit_exponent; } -static e_place_algorithm get_placement_quench_algorithm(const t_placer_opts& placer_opts) { - e_place_algorithm place_algo = placer_opts.place_algorithm; - e_place_quench_metric quench_metric = placer_opts.place_quench_metric; - - if (place_algo == e_place_algorithm::PATH_TIMING_DRIVEN_PLACE) { - if (quench_metric == e_place_quench_metric::AUTO || quench_metric == e_place_quench_metric::TIMING_COST) { - return PATH_TIMING_DRIVEN_PLACE; - } else { - VTR_ASSERT(quench_metric == e_place_quench_metric::SETUP_SLACK); - return SETUP_SLACK_ANALYSIS_PLACE; - } - } else { - VTR_ASSERT(place_algo == e_place_algorithm::BOUNDING_BOX_PLACE); - return BOUNDING_BOX_PLACE; - } -} - bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE); } From 9b01e1fa350a1b1c8ea294d1a82b436e17f40079 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 1 Sep 2020 00:51:45 -0400 Subject: [PATCH 18/24] Changed PATH_TIMING_DRIVEN_PLACE to CRITICALITY_TIMING_PLACE, and SETUP_SLACK_ANALYSIS to SLACK_TIMING_PLACE. Updated documentation. --- doc/src/vpr/command_line_usage.rst | 26 ++++++++---------- vpr/src/base/CheckSetup.cpp | 2 +- vpr/src/base/ShowSetup.cpp | 9 ++++-- vpr/src/base/read_options.cpp | 30 +++++++++++--------- vpr/src/base/vpr_types.h | 4 +-- vpr/src/place/place.cpp | 44 +++++++++++++++--------------- 6 files changed, 60 insertions(+), 55 deletions(-) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index e462f1b42a3..7915129c99f 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -700,16 +700,24 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe **Default:** ``not_locked``. -.. option:: --place_algorithm {bounding_box | path_timing_driven} +.. option:: --place_algorithm {bounding_box | criticality_timing | slack_timing} Controls the algorithm used by the placer. - ``bounding_box`` focuses purely on minimizing the bounding box wirelength of the circuit. + ``bounding_box`` Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified. - ``path_timing_driven`` focuses on minimizing both wirelength and the critical path delay. + ``criticality_timing`` Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay). + ``slack_timing`` Focuses on improving the circuit slack values to reduce critical path delay. - **Default:** ``path_timing_driven`` + **Default:** ``criticality_timing`` + +.. option:: --place_quench_algorithm {bounding_box | criticality_timing | slack_timing} + + Controls the algorithm used by the placer during placement quench. + The algorithm options have identical functionality as the ones used by the option ``--place_algorithm``. + + **Default:** ``criticality_timing`` .. option:: --place_chan_width @@ -862,16 +870,6 @@ The following options are only valid when the placement engine is in timing-driv Name of the post-placement timing report file to generate (not generated if unspecfied). -.. option:: --place_quench_metric {auto, timing_cost, setup_slack} - - Specifies which cost formulation the placer uses during the quench stage. - - * ``auto`` VPR makes the choice. Currently, VPR uses ``timing_cost`` by default. - * ``timing_cost`` Use the timing cost: connection delay * criticality. - * ``setup_slack`` Directly checks the raw setup slack returned by the timing analyzer. - - **Default:** ``auto`` - .. _router_options: Router Options diff --git a/vpr/src/base/CheckSetup.cpp b/vpr/src/base/CheckSetup.cpp index 1b7956e3a4a..fb722999c93 100644 --- a/vpr/src/base/CheckSetup.cpp +++ b/vpr/src/base/CheckSetup.cpp @@ -32,7 +32,7 @@ void CheckSetup(const t_packer_opts& PackerOpts, } if ((false == Timing.timing_analysis_enabled) - && (PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE)) { + && (PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE)) { /* May work, not tested */ VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Timing analysis must be enabled for timing-driven placement.\n"); diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index e7aedc9e0fe..615fd32b63e 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -503,8 +503,11 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, case BOUNDING_BOX_PLACE: VTR_LOG("BOUNDING_BOX_PLACE\n"); break; - case PATH_TIMING_DRIVEN_PLACE: - VTR_LOG("PATH_TIMING_DRIVEN_PLACE\n"); + case CRITICALITY_TIMING_PLACE: + VTR_LOG("CRITICALITY_TIMING_PLACE\n"); + break; + case SLACK_TIMING_PLACE: + VTR_LOG("SLACK_TIMING_PLACE\n"); break; default: VTR_LOG_ERROR("Unknown placement algorithm\n"); @@ -538,7 +541,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlacerOpts.place_chan_width: %d\n", PlacerOpts.place_chan_width); - if (PATH_TIMING_DRIVEN_PLACE == PlacerOpts.place_algorithm) { + if (CRITICALITY_TIMING_PLACE == PlacerOpts.place_algorithm) { VTR_LOG("PlacerOpts.inner_loop_recompute_divider: %d\n", PlacerOpts.inner_loop_recompute_divider); VTR_LOG("PlacerOpts.recompute_crit_iter: %d\n", PlacerOpts.recompute_crit_iter); VTR_LOG("PlacerOpts.timing_tradeoff: %f\n", PlacerOpts.timing_tradeoff); diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index b40e867f672..71c724d55fc 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -315,13 +315,15 @@ struct ParsePlaceDeltaDelayAlgorithm { struct ParsePlaceAlgorithm { ConvertedValue from_str(std::string str) { ConvertedValue conv_value; - if (str == "bounding_box") + if (str == "bounding_box") { conv_value.set_value(BOUNDING_BOX_PLACE); - else if (str == "path_timing_driven") - conv_value.set_value(PATH_TIMING_DRIVEN_PLACE); - else { + } else if (str == "criticality_timing") { + conv_value.set_value(CRITICALITY_TIMING_PLACE); + } else if (str == "slack_timing") { + conv_value.set_value(SLACK_TIMING_PLACE); + } else { std::stringstream msg; - msg << "Invalid conversion from '" << str << "' to e_router_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; conv_value.set_error(msg.str()); } return conv_value; @@ -329,17 +331,19 @@ struct ParsePlaceAlgorithm { ConvertedValue to_str(e_place_algorithm val) { ConvertedValue conv_value; - if (val == BOUNDING_BOX_PLACE) + if (val == BOUNDING_BOX_PLACE) { conv_value.set_value("bounding_box"); - else { - VTR_ASSERT(val == PATH_TIMING_DRIVEN_PLACE); - conv_value.set_value("path_timing_driven"); + } else if (val == CRITICALITY_TIMING_PLACE) { + conv_value.set_value("criticality_timing"); + } else { + VTR_ASSERT(val == SLACK_TIMING_PLACE); + conv_value.set_value("slack_timing"); } return conv_value; } std::vector default_choices() { - return {"bounding_box", "path_timing_driven"}; + return {"bounding_box", "criticality_timing", "slack_timing"}; } }; @@ -1613,8 +1617,8 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg place_grp.add_argument(args.PlaceAlgorithm, "--place_algorithm") .help("Controls which placement algorithm is used") - .default_value("path_timing_driven") - .choices({"bounding_box", "path_timing_driven"}) + .default_value("criticality_timing") + .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width") @@ -2217,7 +2221,7 @@ void set_conditional_defaults(t_options& args) { //Which placement algorithm to use? if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(PATH_TIMING_DRIVEN_PLACE, Provenance::INFERRED); + args.PlaceAlgorithm.set(CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { args.PlaceAlgorithm.set(BOUNDING_BOX_PLACE, Provenance::INFERRED); } diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 34f08d250f6..e8d4f08eaa9 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -851,8 +851,8 @@ struct t_annealing_sched { * doPlacement: true if placement is supposed to be done in the CAD flow, false otherwise */ enum e_place_algorithm { BOUNDING_BOX_PLACE, - PATH_TIMING_DRIVEN_PLACE, - SETUP_SLACK_ANALYSIS_PLACE + CRITICALITY_TIMING_PLACE, + SLACK_TIMING_PLACE }; enum e_place_effort_scaling { diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 17f95b17326..e67311f3a74 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -589,7 +589,7 @@ void try_place(const t_placer_opts& placer_opts, num_swap_aborted = 0; num_ts_called = 0; - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_criticalities(chan_width_dist, placer_opts, router_opts, det_routing_arch, segment_inf, directs, num_directs); @@ -620,7 +620,7 @@ void try_place(const t_placer_opts& placer_opts, /* Gets initial cost and loads bounding boxes. */ - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { costs.bb_cost = comp_bb_cost(NORMAL); first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ @@ -696,7 +696,7 @@ void try_place(const t_placer_opts& placer_opts, //Initial pacement statistics VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, costs.bb_cost, costs.timing_cost); - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", 1e9 * critical_path.delay()); VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", @@ -802,7 +802,7 @@ void try_place(const t_placer_opts& placer_opts, /* Outer loop of the simulated annealing begins */ do { vtr::Timer temperature_timer; - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { costs.cost = 1; } @@ -836,7 +836,7 @@ void try_place(const t_placer_opts& placer_opts, ++num_temps; - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); @@ -898,7 +898,7 @@ void try_place(const t_placer_opts& placer_opts, calc_placer_stats(stats, success_rat, std_dev, costs, move_lim); - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); @@ -935,7 +935,7 @@ void try_place(const t_placer_opts& placer_opts, VTR_LOG("Swaps called: %d\n", num_ts_called); report_aborted_moves(); - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { //Final timing estimate VTR_ASSERT(timing_info); perform_full_timing_update(state.crit_exponent, @@ -1009,7 +1009,7 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info) { - if (placer_opts.place_algorithm != PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm != CRITICALITY_TIMING_PLACE) { return; } @@ -1238,7 +1238,7 @@ static void placement_inner_loop(float t, num_swap_rejected++; } - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ @@ -1303,7 +1303,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, } costs->bb_cost = new_bb_cost; - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { double new_timing_cost = 0.; comp_td_costs(delay_model, *criticalities, &new_timing_cost); if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) { @@ -1424,7 +1424,7 @@ static bool update_annealing_state(t_annealing_state* state, // The idea is that as the range limit shrinks (indicating we are fine-tuning a more optimized placement) we can focus more on a smaller number of critical connections, which a higher crit_exponent achieves. update_rlim(&state->rlim, success_rat, device_ctx.grid); - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim) * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + placer_opts.td_place_exp_first; @@ -1635,7 +1635,7 @@ static e_move_result try_swap(float t, std::vector sink_pins_affected; find_affected_sink_pins(blocks_affected, sink_pins_affected); - if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + if (place_algorithm == SLACK_TIMING_PLACE) { //Invalidates timing of modified connections for incremental timing updates //This routine relies on comparing proposed_connection_delay and connection_delay invalidate_affected_connection_delays(sink_pins_affected, @@ -1666,7 +1666,7 @@ static e_move_result try_swap(float t, //TODO: calculate a weighted average of the slack cost and wiring cost delta_c = analyze_setup_slack_cost(setup_slacks); - } else if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + } else if (place_algorithm == CRITICALITY_TIMING_PLACE) { /*in this case we redefine delta_c as a combination of timing and bb. * *additionally, we normalize all values, therefore delta_c is in * *relation to 1*/ @@ -1685,7 +1685,7 @@ static e_move_result try_swap(float t, costs->cost += delta_c; costs->bb_cost += bb_delta_c; - if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + if (place_algorithm == SLACK_TIMING_PLACE) { /* Update the timing driven cost as usual */ costs->timing_cost += timing_delta_c; @@ -1694,7 +1694,7 @@ static e_move_result try_swap(float t, commit_setup_slacks(setup_slacks); } - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (place_algorithm == CRITICALITY_TIMING_PLACE) { costs->timing_cost += timing_delta_c; //Invalidates timing of modified connections for incremental timing updates @@ -1724,7 +1724,7 @@ static e_move_result try_swap(float t, /* Restore the place_ctx.block_locs data structures to their state before the move. */ revert_move_blocks(blocks_affected); - if (place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + if (place_algorithm == SLACK_TIMING_PLACE) { //Revert the timing delays and costs to pre-update values //These routines must be called after reverting the block moves //TODO: make this process incremental @@ -1748,7 +1748,7 @@ static e_move_result try_swap(float t, "The current setup slacks should be identical to the values before the try swap timing info update."); } - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (place_algorithm == CRITICALITY_TIMING_PLACE) { /* Unstage the values stored in proposed_* data structures */ revert_td_cost(blocks_affected); } @@ -1817,7 +1817,7 @@ static int find_affected_nets_and_update_costs(e_place_algorithm place_algorithm //once per net, not once per pin. update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE || place_algorithm == SETUP_SLACK_ANALYSIS_PLACE) { + if (place_algorithm == CRITICALITY_TIMING_PLACE || place_algorithm == SLACK_TIMING_PLACE) { //Determine the change in timing costs if required update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c); } @@ -2440,7 +2440,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, max_pins_per_clb = max(max_pins_per_clb, type.num_pins); } - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { /* Allocate structures associated with timing driven placement */ /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); @@ -2486,7 +2486,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ static void free_placement_structs(const t_placer_opts& placer_opts) { - if (placer_opts.place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { vtr::release_memory(connection_timing_cost); vtr::release_memory(connection_delay); vtr::release_memory(connection_setup_slack); @@ -3085,7 +3085,7 @@ static int check_placement_costs(const t_placer_costs& costs, error++; } - if (place_algorithm == PATH_TIMING_DRIVEN_PLACE) { + if (place_algorithm == CRITICALITY_TIMING_PLACE) { comp_td_costs(delay_model, *criticalities, &timing_cost_check); //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * ERROR_TOL) { @@ -3355,5 +3355,5 @@ static void init_annealing_state(t_annealing_state* state, } bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { - return (vpr_setup.PlacerOpts.place_algorithm == PATH_TIMING_DRIVEN_PLACE); + return (vpr_setup.PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE); } From e0b70c4ae0cef863b0461cf89e2967b0dcded8ac Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 1 Sep 2020 02:05:17 -0400 Subject: [PATCH 19/24] Changed e_place_algorithm to t_place_algorithm, a wrapper class that provides more utility regarding place algorithms. --- vpr/src/base/CheckSetup.cpp | 2 +- vpr/src/base/ShowSetup.cpp | 4 +- vpr/src/base/vpr_types.h | 135 +++++++++++++++++++++++++++++------- vpr/src/place/place.cpp | 20 +++--- 4 files changed, 124 insertions(+), 37 deletions(-) diff --git a/vpr/src/base/CheckSetup.cpp b/vpr/src/base/CheckSetup.cpp index fb722999c93..5b708584714 100644 --- a/vpr/src/base/CheckSetup.cpp +++ b/vpr/src/base/CheckSetup.cpp @@ -23,7 +23,7 @@ void CheckSetup(const t_packer_opts& PackerOpts, } if ((GLOBAL == RouterOpts.route_type) - && (BOUNDING_BOX_PLACE != PlacerOpts.place_algorithm)) { + && (PlacerOpts.place_algorithm != BOUNDING_BOX_PLACE)) { /* Works, but very weird. Can't optimize timing well, since you're * not doing proper architecture delay modelling. */ VTR_LOG_WARN( diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 615fd32b63e..d9c36d0bab5 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -499,7 +499,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, if ((PLACE_ONCE == PlacerOpts.place_freq) || (PLACE_ALWAYS == PlacerOpts.place_freq)) { VTR_LOG("PlacerOpts.place_algorithm: "); - switch (PlacerOpts.place_algorithm) { + switch (PlacerOpts.place_algorithm.get()) { case BOUNDING_BOX_PLACE: VTR_LOG("BOUNDING_BOX_PLACE\n"); break; @@ -541,7 +541,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlacerOpts.place_chan_width: %d\n", PlacerOpts.place_chan_width); - if (CRITICALITY_TIMING_PLACE == PlacerOpts.place_algorithm) { + if (PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE) { VTR_LOG("PlacerOpts.inner_loop_recompute_divider: %d\n", PlacerOpts.inner_loop_recompute_divider); VTR_LOG("PlacerOpts.recompute_crit_iter: %d\n", PlacerOpts.recompute_crit_iter); VTR_LOG("PlacerOpts.timing_tradeoff: %f\n", PlacerOpts.timing_tradeoff); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index e8d4f08eaa9..ff777f1dada 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -826,35 +826,79 @@ struct t_annealing_sched { float success_target; }; -/* Various options for the placer. * - * place_algorithm: BOUNDING_BOX_PLACE or PATH_TIMING_DRIVEN_PLACE * - * timing_tradeoff: When TIMING_DRIVEN_PLACE mode, what is the tradeoff * - * timing driven and BOUNDING_BOX_PLACE. * - * place_cost_exp: Power to which denominator is raised for linear_cong. * - * place_chan_width: The channel width assumed if only one placement is * - * performed. * - * pad_loc_type: Are pins FREE, fixed randomly, or fixed from a file. * - * block_loc_type: Are blocks fixed from a file. * - * constraints_file: File to read block locations from if block_loc_type * - * is LOCKED. * - * pad_loc_file: File to read pad locations from if pad_loc_type is USER. * - * place_freq: Should the placement be skipped, done once, or done for each * - * channel width in the binary search. * - * recompute_crit_iter: how many temperature stages pass before we recompute * - * criticalities based on average point to point delay * - * inner_loop_crit_divider: (move_lim/inner_loop_crit_divider) determines how* - * many inner_loop iterations pass before a recompute of * - * criticalities is done. * - * td_place_exp_first: exponent that is used on the timing_driven criticlity * - * it is the value that the exponent starts at. * - * td_place_exp_last: value that the criticality exponent will be at the end * - * doPlacement: true if placement is supposed to be done in the CAD flow, false otherwise */ +/****************************************************************** + * Placer data types + *******************************************************************/ + +/** + * @brief Types of placement algorithms used in the placer. + * + * @param BOUNDING_BOX_PLACE + * Focuses purely on minimizing the bounding + * box wirelength of the circuit. + * @param CRITICALITY_TIMING_PLACE + * Focuses on minimizing both the wirelength and the + * connection timing costs (criticality * delay). + * @param SLACK_TIMING_PLACE + * Focuses on improving the circuit slack values + * to reduce critical path delay. + * + * The default is to use CRITICALITY_TIMING_PLACE. BOUNDING_BOX_PLACE + * is used when there is no timing information available (wiring only). + * SLACK_TIMING_PLACE is mainly feasible during placement quench. + */ enum e_place_algorithm { BOUNDING_BOX_PLACE, CRITICALITY_TIMING_PLACE, SLACK_TIMING_PLACE }; +/** + * @brief Provides a wrapper around enum e_place_algorithm. + * + * Supports the method isTimingDriven(), which allows flexible updates + * to the placer algorithms if more timing driven placement strategies + * are added in tht future. This method is used across various placement + * setup files, and it can be useful for major placer routines as well. + * + * More methods can be added to this class if the placement strategies + * will be further divided into more categories the future. + * + * Also supports assignments and comparisons between t_place_algorithm + * and e_place_algorithm so as not to break down previous codes. + */ +class t_place_algorithm { + public: + //Constructors + t_place_algorithm() = default; + t_place_algorithm(e_place_algorithm _algo) + : algo(_algo) {} + ~t_place_algorithm() = default; + + //Assignment operators + void operator=(const t_place_algorithm& rhs) { algo = rhs.algo; } + void operator=(e_place_algorithm rhs) { algo = rhs; } + + //Equality operators + bool operator==(const t_place_algorithm& rhs) const { return algo == rhs.algo; } + bool operator==(e_place_algorithm rhs) const { return algo == rhs; } + bool operator!=(const t_place_algorithm& rhs) const { return algo != rhs.algo; } + bool operator!=(e_place_algorithm rhs) const { return algo != rhs; } + + ///@brief Check if the algorithm belongs to the timing driven category. + inline bool isTimingDriven() const { + return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE; + } + + ///@brief Accessor: returns the underlying e_place_algorithm enum value. + e_place_algorithm get() const { return algo; } + + private: + ///@brief The underlying algorithm. Default set to CRITICALITY_TIMING_PLACE. + e_place_algorithm algo = e_place_algorithm::CRITICALITY_TIMING_PLACE; +}; + +///@brief Used to calculate the inner placer loop's block swapping limit move_lim. enum e_place_effort_scaling { CIRCUIT, /// Date: Tue, 1 Sep 2020 02:25:34 -0400 Subject: [PATCH 20/24] Added --place_quench_algorithm option to VPR options. Specifies the placement algorithm to be used during quench. --- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/read_options.cpp | 11 +++++++++++ vpr/src/base/read_options.h | 1 + vpr/src/place/place.cpp | 4 ++-- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index c10609e6857..28367900663 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -530,6 +530,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->td_place_exp_last = Options.place_exp_last; PlacerOpts->place_algorithm = Options.PlaceAlgorithm; + PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm; PlacerOpts->constraints_file = Options.constraints_file; PlacerOpts->pad_loc_file = Options.pad_loc_file; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 71c724d55fc..25035f369c7 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1621,6 +1621,12 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") + .help("Controls which placement algorithm is used during placement quench") + .default_value("criticality_timing") + .choices({"bounding_box", "criticality_timing", "slack_timing"}) + .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width") .help( "Sets the assumed channel width during placement. " @@ -2227,6 +2233,11 @@ void set_conditional_defaults(t_options& args) { } } + //Which placement algorithm to use during placement quench? + if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) { + args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED); + } + //Place chan width follows Route chan width if unspecified if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED); diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index e3e1307823e..2f76a2a5360 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -105,6 +105,7 @@ struct t_options { argparse::ArgValue PlaceSuccessTarget; argparse::ArgValue anneal_sched_type; argparse::ArgValue PlaceAlgorithm; + argparse::ArgValue PlaceQuenchAlgorithm; argparse::ArgValue pad_loc_type; argparse::ArgValue block_loc_type; argparse::ArgValue PlaceChanWidth; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 7ecd6bd129e..4b717205c3d 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -891,14 +891,14 @@ void try_place(const t_placer_opts& placer_opts, *move_generator, blocks_affected, timing_info.get(), - placer_opts.place_algorithm); + placer_opts.place_quench_algorithm); tot_iter += move_lim; ++num_temps; calc_placer_stats(stats, success_rat, std_dev, costs, move_lim); - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_quench_algorithm == CRITICALITY_TIMING_PLACE) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); From dd2cfe24550edd7853df96965912cdd9371d2fdf Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 1 Sep 2020 03:23:05 -0400 Subject: [PATCH 21/24] Utilized the is_timing_driven() method provided by the t_place_algorithm class to efficiently include SLACK_TIMING_PLACE in all branchings related to CRITICALITY_TIMING_PLACE. --- vpr/src/base/CheckSetup.cpp | 4 ++-- vpr/src/base/ShowSetup.cpp | 2 +- vpr/src/base/vpr_types.h | 2 +- vpr/src/place/place.cpp | 32 ++++++++++++++++---------------- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/vpr/src/base/CheckSetup.cpp b/vpr/src/base/CheckSetup.cpp index 5b708584714..debbf9e486f 100644 --- a/vpr/src/base/CheckSetup.cpp +++ b/vpr/src/base/CheckSetup.cpp @@ -23,7 +23,7 @@ void CheckSetup(const t_packer_opts& PackerOpts, } if ((GLOBAL == RouterOpts.route_type) - && (PlacerOpts.place_algorithm != BOUNDING_BOX_PLACE)) { + && (PlacerOpts.place_algorithm.is_timing_driven())) { /* Works, but very weird. Can't optimize timing well, since you're * not doing proper architecture delay modelling. */ VTR_LOG_WARN( @@ -32,7 +32,7 @@ void CheckSetup(const t_packer_opts& PackerOpts, } if ((false == Timing.timing_analysis_enabled) - && (PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE)) { + && (PlacerOpts.place_algorithm.is_timing_driven())) { /* May work, not tested */ VPR_FATAL_ERROR(VPR_ERROR_OTHER, "Timing analysis must be enabled for timing-driven placement.\n"); diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index d9c36d0bab5..93c014d6d1f 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -541,7 +541,7 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts, VTR_LOG("PlacerOpts.place_chan_width: %d\n", PlacerOpts.place_chan_width); - if (PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (PlacerOpts.place_algorithm.is_timing_driven()) { VTR_LOG("PlacerOpts.inner_loop_recompute_divider: %d\n", PlacerOpts.inner_loop_recompute_divider); VTR_LOG("PlacerOpts.recompute_crit_iter: %d\n", PlacerOpts.recompute_crit_iter); VTR_LOG("PlacerOpts.timing_tradeoff: %f\n", PlacerOpts.timing_tradeoff); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index ff777f1dada..2c541414c2d 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -886,7 +886,7 @@ class t_place_algorithm { bool operator!=(e_place_algorithm rhs) const { return algo != rhs; } ///@brief Check if the algorithm belongs to the timing driven category. - inline bool isTimingDriven() const { + inline bool is_timing_driven() const { return algo == CRITICALITY_TIMING_PLACE || algo == SLACK_TIMING_PLACE; } diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 4b717205c3d..0b5b06489bd 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -589,7 +589,7 @@ void try_place(const t_placer_opts& placer_opts, num_swap_aborted = 0; num_ts_called = 0; - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { /*do this before the initial placement to avoid messing up the initial placement */ place_delay_model = alloc_lookups_and_criticalities(chan_width_dist, placer_opts, router_opts, det_routing_arch, segment_inf, directs, num_directs); @@ -620,7 +620,7 @@ void try_place(const t_placer_opts& placer_opts, /* Gets initial cost and loads bounding boxes. */ - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { costs.bb_cost = comp_bb_cost(NORMAL); first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ @@ -696,7 +696,7 @@ void try_place(const t_placer_opts& placer_opts, //Initial pacement statistics VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost, costs.bb_cost, costs.timing_cost); - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { VTR_LOG("Initial placement estimated Critical Path Delay (CPD): %g ns\n", 1e9 * critical_path.delay()); VTR_LOG("Initial placement estimated setup Total Negative Slack (sTNS): %g ns\n", @@ -802,7 +802,7 @@ void try_place(const t_placer_opts& placer_opts, /* Outer loop of the simulated annealing begins */ do { vtr::Timer temperature_timer; - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { costs.cost = 1; } @@ -836,7 +836,7 @@ void try_place(const t_placer_opts& placer_opts, ++num_temps; - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); @@ -898,7 +898,7 @@ void try_place(const t_placer_opts& placer_opts, calc_placer_stats(stats, success_rat, std_dev, costs, move_lim); - if (placer_opts.place_quench_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_quench_algorithm.is_timing_driven()) { critical_path = timing_info->least_slack_critical_path(); sTNS = timing_info->setup_total_negative_slack(); sWNS = timing_info->setup_worst_negative_slack(); @@ -935,7 +935,7 @@ void try_place(const t_placer_opts& placer_opts, VTR_LOG("Swaps called: %d\n", num_ts_called); report_aborted_moves(); - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { //Final timing estimate VTR_ASSERT(timing_info); perform_full_timing_update(state.crit_exponent, @@ -1009,7 +1009,7 @@ static void outer_loop_update_timing_info(const t_placer_opts& placer_opts, PlacerSetupSlacks* setup_slacks, ClusteredPinTimingInvalidator* pin_timing_invalidator, SetupTimingInfo* timing_info) { - if (placer_opts.place_algorithm != CRITICALITY_TIMING_PLACE) { + if (!placer_opts.place_algorithm.is_timing_driven()) { return; } @@ -1238,7 +1238,7 @@ static void placement_inner_loop(float t, num_swap_rejected++; } - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ @@ -1303,7 +1303,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, } costs->bb_cost = new_bb_cost; - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; comp_td_costs(delay_model, *criticalities, &new_timing_cost); if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) { @@ -1424,7 +1424,7 @@ static bool update_annealing_state(t_annealing_state* state, // The idea is that as the range limit shrinks (indicating we are fine-tuning a more optimized placement) we can focus more on a smaller number of critical connections, which a higher crit_exponent achieves. update_rlim(&state->rlim, success_rat, device_ctx.grid); - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { state->crit_exponent = (1 - (state->rlim - FINAL_RLIM) * state->inverse_delta_rlim) * (placer_opts.td_place_exp_last - placer_opts.td_place_exp_first) + placer_opts.td_place_exp_first; @@ -1817,7 +1817,7 @@ static int find_affected_nets_and_update_costs(const t_place_algorithm& place_al //once per net, not once per pin. update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); - if (place_algorithm == CRITICALITY_TIMING_PLACE || place_algorithm == SLACK_TIMING_PLACE) { + if (place_algorithm.is_timing_driven()) { //Determine the change in timing costs if required update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c); } @@ -2440,7 +2440,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, max_pins_per_clb = max(max_pins_per_clb, type.num_pins); } - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { /* Allocate structures associated with timing driven placement */ /* [0..cluster_ctx.clb_nlist.nets().size()-1][1..num_pins-1] */ connection_delay = make_net_pins_matrix(cluster_ctx.clb_nlist, 0.f); @@ -2486,7 +2486,7 @@ static void alloc_and_load_placement_structs(float place_cost_exp, /* Frees the major structures needed by the placer (and not needed * * elsewhere). */ static void free_placement_structs(const t_placer_opts& placer_opts) { - if (placer_opts.place_algorithm == CRITICALITY_TIMING_PLACE) { + if (placer_opts.place_algorithm.is_timing_driven()) { vtr::release_memory(connection_timing_cost); vtr::release_memory(connection_delay); vtr::release_memory(connection_setup_slack); @@ -3085,7 +3085,7 @@ static int check_placement_costs(const t_placer_costs& costs, error++; } - if (place_algorithm == CRITICALITY_TIMING_PLACE) { + if (place_algorithm.is_timing_driven()) { comp_td_costs(delay_model, *criticalities, &timing_cost_check); //VTR_LOG("timing_cost recomputed from scratch: %g\n", timing_cost_check); if (fabs(timing_cost_check - costs.timing_cost) > costs.timing_cost * ERROR_TOL) { @@ -3355,5 +3355,5 @@ static void init_annealing_state(t_annealing_state* state, } bool placer_needs_lookahead(const t_vpr_setup& vpr_setup) { - return (vpr_setup.PlacerOpts.place_algorithm == CRITICALITY_TIMING_PLACE); + return (vpr_setup.PlacerOpts.place_algorithm.is_timing_driven()); } From 23e87824207b1e0c926fb7fa93bc5e3318f99606 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Fri, 4 Sep 2020 06:20:53 -0400 Subject: [PATCH 22/24] Fixed slack cost routine bug and updated golden results. Added code documentation for options --place_algorithm and --place_quench_algorithm (same as the documentation on developer's page. --- vpr/src/base/read_options.cpp | 12 ++++++++++-- vpr/src/place/place.cpp | 8 ++++---- .../config/golden_results.txt | 2 +- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 0ddb040bec0..a5c18cfd7f4 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -1683,13 +1683,21 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg .show_in(argparse::ShowIn::HELP_ONLY); place_grp.add_argument(args.PlaceAlgorithm, "--place_algorithm") - .help("Controls which placement algorithm is used") + .help( + "Controls which placement algorithm is used. Valid options:\n" + " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" + " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n" + " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n") .default_value("criticality_timing") .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); place_grp.add_argument(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") - .help("Controls which placement algorithm is used during placement quench") + .help( + "Controls which placement algorithm is used during placement quench. Valid options:\n" + " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" + " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n" + " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n") .default_value("criticality_timing") .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 848e011310b..dd378a013fc 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1251,7 +1251,7 @@ static void placement_inner_loop(float t, num_swap_rejected++; } - if (placer_opts.place_algorithm.is_timing_driven()) { + if (place_algorithm.is_timing_driven()) { /* Do we want to re-timing analyze the circuit to get updated slack and criticality values? * We do this only once in a while, since it is expensive. */ @@ -1688,11 +1688,11 @@ static e_move_result try_swap(float t, /*in this case we redefine delta_c as a combination of timing and bb. * *additionally, we normalize all values, therefore delta_c is in * *relation to 1*/ - delta_c = (1 - timing_tradeoff) * bb_delta_c * prev_inverse_costs->bb_cost + timing_tradeoff * timing_delta_c * prev_inverse_costs->timing_cost; - } else { //place_algorithm == BOUNDING_BOX_PLACE (wiring cost) + } else { + VTR_ASSERT(place_algorithm == BOUNDING_BOX_PLACE); delta_c = bb_delta_c; } @@ -2010,7 +2010,7 @@ static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks) { //Check the first pair of slack values that are different //If found, return their difference for (size_t idiff = 0; idiff < original_setup_slacks.size(); ++idiff) { - float slack_diff = original_setup_slacks[idiff] != proposed_setup_slacks[idiff]; + float slack_diff = original_setup_slacks[idiff] - proposed_setup_slacks[idiff]; if (slack_diff != 0) { return slack_diff; diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt index e531fc610c9..5053a6f6894 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/config/golden_results.txt @@ -1,2 +1,2 @@ arch circuit script_params vtr_flow_elapsed_time error odin_synth_time max_odin_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_time placed_wirelength_est place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time -k6_N10_mem32K_40nm.xml stereovision3.v common 1.63 0.05 9248 4 0.11 -1 -1 33212 -1 -1 19 11 0 0 success v8.0.0-2369-gdd2cfe245-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-60-generic x86_64 2020-09-01T03:44:11 betzgrp-wintermute.eecg.utoronto.ca /home/hubingra/master/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/run002/k6_N10_mem32K_40nm.xml/stereovision3.v/common 29064 11 30 262 292 2 104 60 7 7 49 clb auto 0.05 395 0.24 0.13 2.22041 -166.35 -2.22041 2.11404 0.131181 0.10474 0.162837 0.12936 20 710 35 1.07788e+06 1.02399e+06 49980.0 1020.00 0.33 0.261832 0.208973 526 24 854 2107 65873 19424 2.36384 2.31477 -185.592 -2.36384 0 0 65453.8 1335.79 0.03 0.0178836 0.0152135 +k6_N10_mem32K_40nm.xml stereovision3.v common 2.19 0.07 9296 4 0.16 -1 -1 32824 -1 -1 19 11 0 0 success v8.0.0-2579-g270d1efd9-dirty release IPO VTR_ASSERT_LEVEL=2 GNU 7.5.0 on Linux-4.15.0-60-generic x86_64 2020-09-04T06:15:46 betzgrp-wintermute.eecg.utoronto.ca /home/hubingra/master/vtr-verilog-to-routing/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_place_quench_slack/run003/k6_N10_mem32K_40nm.xml/stereovision3.v/common 28964 11 30 262 292 2 104 60 7 7 49 clb auto 0.05 453 0.24 0.13 2.18141 -165.789 -2.18141 2.0954 0.12497 0.10019 0.156789 0.124805 26 608 25 1.07788e+06 1.02399e+06 65453.8 1335.79 0.27 0.252669 0.202403 608 25 973 2367 87670 24993 2.53264 2.50992 -189.166 -2.53264 0 0 80140.9 1635.53 0.03 0.0187426 0.0157532 From f6e809ee88c7386a980e4436575d2c8c3f8bed3f Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Sat, 5 Sep 2020 20:33:34 -0400 Subject: [PATCH 23/24] Code review update: enhance more documentations. Added function level description for three routines in place.cpp: find_affected_sink_pins(), commit_td_cost(), invalidate_affected_connection_delays(). --- doc/src/vpr/command_line_usage.rst | 2 +- vpr/src/base/read_options.cpp | 11 ++- vpr/src/base/vpr_types.h | 29 +++++--- vpr/src/place/place.cpp | 58 ++++++++++------ vpr/src/place/timing_place.cpp | 21 +++--- vpr/src/place/timing_place.h | 69 ++++++++++++++----- .../vtr_reg_strong/task_list.txt | 1 + 7 files changed, 131 insertions(+), 60 deletions(-) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index 7915129c99f..81bd3b03347 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -715,7 +715,7 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe .. option:: --place_quench_algorithm {bounding_box | criticality_timing | slack_timing} Controls the algorithm used by the placer during placement quench. - The algorithm options have identical functionality as the ones used by the option ``--place_algorithm``. + The algorithm options have identical functionality as the ones used by the option ``--place_algorithm``. If specified, it overrides the option ``--place_algorithm`` during placement quench. **Default:** ``criticality_timing`` diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index a5c18cfd7f4..6c203a76582 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -359,6 +359,13 @@ struct ParsePlaceAlgorithm { } else { std::stringstream msg; msg << "Invalid conversion from '" << str << "' to e_place_algorithm (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + + //Deprecated option: "path_timing_driven" -> PATH_DRIVEN_TIMING_PLACE + //New option: "criticality_timing" -> CRITICALITY_TIMING_PLACE + if (str == "path_timing_driven") { + msg << "\nDeprecated option: 'path_timing_driven'. It has been renamed to 'criticality_timing'"; + } + conv_value.set_error(msg.str()); } return conv_value; @@ -1694,7 +1701,9 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg place_grp.add_argument(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") .help( - "Controls which placement algorithm is used during placement quench. Valid options:\n" + "Controls which placement algorithm is used during placement quench.\n" + "If specified, it overrides the option --place_algorithm during placement quench.\n" + "Valid options:\n" " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" " * criticality_timing: Focuses on minimizing both the wirelength and the connection timing costs (criticality * delay).\n" " * slack_timing: Focuses on improving the circuit slack values to reduce critical path delay.\n") diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index fdf60ab40c3..ba3dc5a5382 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -859,7 +859,7 @@ enum e_place_algorithm { /** * @brief Provides a wrapper around enum e_place_algorithm. * - * Supports the method isTimingDriven(), which allows flexible updates + * Supports the method is_timing_driven(), which allows flexible updates * to the placer algorithms if more timing driven placement strategies * are added in tht future. This method is used across various placement * setup files, and it can be useful for major placer routines as well. @@ -879,8 +879,14 @@ class t_place_algorithm { ~t_place_algorithm() = default; //Assignment operators - void operator=(const t_place_algorithm& rhs) { algo = rhs.algo; } - void operator=(e_place_algorithm rhs) { algo = rhs; } + t_place_algorithm& operator=(const t_place_algorithm& rhs) { + algo = rhs.algo; + return *this; + } + t_place_algorithm& operator=(e_place_algorithm rhs) { + algo = rhs; + return *this; + } //Equality operators bool operator==(const t_place_algorithm& rhs) const { return algo == rhs.algo; } @@ -953,11 +959,10 @@ enum class e_place_delta_delay_algorithm { * @param place_chan_width * The channel width assumed if only one placement is performed. * @param pad_loc_type - * Are pins FREE, fixed randomly, or fixed from a file. - * @param block_loc_type - * Are blocks fixed from a file. + * Are pins FREE or fixed randomly. * @param constraints_file - * File to read block locations from if block_loc_type is LOCKED. + * File that specifies locations of locked down (constrained) + * blocks for placement. Empty string means no constraints file. * @param pad_loc_file * File to read pad locations from if pad_loc_type is USER. * @param place_freq @@ -965,16 +970,20 @@ enum class e_place_delta_delay_algorithm { * for each channel width in the binary search. * @param recompute_crit_iter * How many temperature stages pass before we recompute - * criticalities based on average point to point delay. + * criticalities based on the current placement and its + * estimated point-to-point delays. * @param inner_loop_crit_divider * (move_lim/inner_loop_crit_divider) determines how * many inner_loop iterations pass before a recompute * of criticalities is done. * @param td_place_exp_first * Exponent that is used in the CRITICALITY_TIMING_PLACE - * mode. It is the value that the crit_exponent starts at. + * mode to specify the initial value of `crit_exponent`. + * After we map the slacks to criticalities, this value + * is used to `sharpen` the criticalities, making connections + * with worse slacks more critical. * @param td_place_exp_last - * Value that the criticality exponent will be at the end. + * Value that the crit_exponent will be at the end. * @param doPlacement * True if placement is supposed to be done in the CAD flow. * False if otherwise. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index dd378a013fc..53349441d29 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -1653,9 +1653,13 @@ static e_move_result try_swap(float t, std::vector sink_pins_affected; find_affected_sink_pins(blocks_affected, sink_pins_affected); + //For setup slack analysis, we first do a timing analysis to get the newest slack values + //resulted from the proposed block moves. If the move turns out to be accepted, we keep + //the updated slack values and commit the block moves. If rejected, we reject the proposed + //block moves and revert this timing analysis. if (place_algorithm == SLACK_TIMING_PLACE) { - //Invalidates timing of modified connections for incremental timing updates - //This routine relies on comparing proposed_connection_delay and connection_delay + //Gather all the connections with modified delays for incremental timing updates. + //This routine relies on comparing proposed_connection_delay and connection_delay. invalidate_affected_connection_delays(sink_pins_affected, pin_timing_invalidator, timing_info); @@ -1749,7 +1753,9 @@ static e_move_result try_swap(float t, comp_td_connection_delays(delay_model); comp_td_costs(delay_model, *criticalities, &costs->timing_cost); - //Re-invalidate the affected sink pins + //Re-invalidate the affected sink pins since the proposed move is + //rejected, and the same blocks are reverted to their original + //positions. The affected sink pins should stay the same. invalidate_affected_connection_delays(sink_pins_affected, pin_timing_invalidator, timing_info); @@ -1946,19 +1952,23 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, } } +/** + * @brief Find all the sink pins with changed connection delays from the affected blocks. + * + * These sink pins will be passed into the pin_timing_invalidator for timing update. + * They will also be added to the pin invalidator when we wish to revert a timing update. + * + * It is possible that some connections may not have changed delay. For instance, if + * using a dx/dy delay model, this could occur if a sink moved to a new position with + * the same dx/dy from it's driver. To minimize work during the incremental STA update + * we do not invalidate such unchanged connections. + */ static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, std::vector& sink_pins_affected) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& clb_nlist = cluster_ctx.clb_nlist; for (ClusterPinId clb_pin : blocks_affected.affected_pins) { - //It is possible that some connections may not have changed delay.(e.g. - //For instance, if using a dx/dy delay model, this could occur if a sink - //moved to a new position with the same dx/dy from it's driver. - // - //To minimize work during the incremental STA update we do not invalidate - //such unchanged connections. - ClusterNetId net = clb_nlist.pin_net(clb_pin); int ipin = clb_nlist.pin_net_index(clb_pin); @@ -2178,8 +2188,13 @@ static bool verify_connection_setup_slacks(const PlacerSetupSlacks* setup_slacks return true; } -/* Update the connection_timing_cost values from the temporary * - * values for all connections that have changed. */ +/** + * @brief Update the connection_timing_cost values from the temporary + * values for all connections that have/haven't changed. + * + * All the connections have already been gathered by blocks_affected.affected_pins + * after running the routine find_affected_nets_and_update_costs() in try_swap(). + */ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& clb_nlist = cluster_ctx.clb_nlist; @@ -2217,10 +2232,15 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { #endif } -//Invalidates the delays of connections effected by the specified move -// -//Relies on proposed_connection_delay and connection_delay to detect -//which connections have actually had their delay changed. +/** + * @brief Invalidates the delays of connections effected by the specified move. + * + * Relies on find_affected_sink_pins() to find all the connections with different + * `proposed_connection_delay` and `connection_delay`. + * + * Invalidate all the timing graph edges associated with these sink pins via the + * ClusteredPinTimingInvalidator class. + */ static void invalidate_affected_connection_delays(const std::vector& sink_pins_affected, ClusteredPinTimingInvalidator* pin_tedges_invalidator, TimingInfo* timing_info) { @@ -2229,12 +2249,6 @@ static void invalidate_affected_connection_delays(const std::vectorinvalidate_connection(clb_pin, timing_info); } } diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 0c06c134709..917ad860759 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -19,9 +19,6 @@ #include "timing_info.h" -///@brief Use an incremental approach to updating criticalities and setup slacks? -static constexpr bool INCR_UPDATE_CRITICALITIES = true, INCR_UPDATE_SETUP_SLACKS = true; - ///@brief Allocates space for the timing_place_crit_ data structure. PlacerCriticalities::PlacerCriticalities(const ClusteredNetlist& clb_nlist, const ClusteredPinAtomPinsLookup& netlist_pin_lookup) : clb_nlist_(clb_nlist) @@ -48,7 +45,7 @@ void PlacerCriticalities::update_criticalities(const SetupTimingInfo* timing_inf } /* Determine what pins need updating */ - if (!recompute_required && crit_exponent == last_crit_exponent_ && INCR_UPDATE_CRITICALITIES) { + if (!recompute_required && crit_exponent == last_crit_exponent_) { incr_update_criticalities(timing_info); } else { recompute_criticalities(); @@ -123,8 +120,11 @@ void PlacerCriticalities::recompute_criticalities() { } ///@brief Override the criticality of a particular connection. -void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float val) { - timing_place_crit_[net_id][ipin] = val; +void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float crit_val) { + VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin = 0)"); + VTR_ASSERT_SAFE_MSG(ipin < clb_nlist_.net_pins(net_id).size(), "The pin index in net should be smaller than fanout"); + + timing_place_crit_[net_id][ipin] = crit_val; } /** @@ -163,7 +163,7 @@ void PlacerSetupSlacks::update_setup_slacks(const SetupTimingInfo* timing_info) } /* Determine what pins need updating */ - if (!recompute_required && INCR_UPDATE_SETUP_SLACKS) { + if (!recompute_required) { incr_update_setup_slacks(timing_info); } else { recompute_setup_slacks(); @@ -223,8 +223,11 @@ void PlacerSetupSlacks::recompute_setup_slacks() { } ///@brief Override the setup slack of a particular connection. -void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float val) { - timing_place_setup_slacks_[net_id][ipin] = val; +void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) { + VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin = 0)"); + VTR_ASSERT_SAFE_MSG(ipin < clb_nlist_.net_pins(net_id).size(), "The pin index in net should be smaller than fanout"); + + timing_place_setup_slacks_[net_id][ipin] = slack_val; } /** diff --git a/vpr/src/place/timing_place.h b/vpr/src/place/timing_place.h index 6bf6420a429..74996de4a5a 100644 --- a/vpr/src/place/timing_place.h +++ b/vpr/src/place/timing_place.h @@ -17,11 +17,18 @@ * (to avoid round-offs) while doing incremental updates. * * Calculating criticalities: - * All the raw setup slack values across a single clock domain are gathered, shifted, - * and rated from best to worst. The best shifted slack value (the most positive one) - * will have a criticality of 0, while the worse shifted slack value (always 0) - * will have a criticality of 1. Criticalities are used to calculated timing costs - * for each connection (delay * criticality). + * All the raw setup slack values across a single clock domain are gathered + * and rated from the best to the worst in terms of criticalities. In order + * to calculate criticalities, all the slack values need to be non-negative. + * Hence, if the worst slack is negative, all the slack values are shifted + * by the value of the worst slack so that the value is at least 0. If the + * worst slack is positive, then no shift happens. + * + * The best (shifted) slack (the most positive one) will have a criticality of 0. + * The worst (shifted) slack value will have a criticality of 1. + * + * Criticalities are used to calculated timing costs for each connection. + * The formula is cost = delay * criticality. * * For a more detailed description on how criticalities are calculated, see * calc_relaxed_criticality() in `timing_util.cpp`. @@ -63,9 +70,10 @@ std::unique_ptr alloc_lookups_and_criticalities(t_chan_width_di * * Therefore, if SetupTimingInfo is updated twice in succession without criticalities * getting updated (update_enabled = false), the returned set cannot account for all - * the connections that have been modified, in which case a recomputation is required. - * Hence, each time update_setup_slacks_and_criticalities() is called, we assign - * `recompute_required` the opposite value of `update_enabled`. + * the connections that have been modified. In this case, we flag `recompute_required` + * as false, and we recompute the criticalities for every connection to ensure that + * they are all up to date. Hence, each time update_setup_slacks_and_criticalities() + * is called, we assign `recompute_required` the opposite value of `update_enabled`. * * This class also maps/transforms the modified atom connections/pins returned by the * timing info into modified clustered netlist connections/pins after calling @@ -110,11 +118,16 @@ class PlacerCriticalities { /** * @brief Updates criticalities based on the atom netlist criticalitites * provided by timing_info and the provided criticality_exponent. + * + * Should consistently call this method after the most recent timing analysis to + * keep the criticalities stored in this class in sync with the timing analyzer. + * If out of sync, then the criticalities cannot be incrementally updated on + * during the next timing analysis iteration. */ void update_criticalities(const SetupTimingInfo* timing_info, float criticality_exponent); ///@brief Override the criticality of a particular connection. - void set_criticality(ClusterNetId net, int ipin, float val); + void set_criticality(ClusterNetId net, int ipin, float crit_val); ///@brief Set `update_enabled` to true. void enable_update() { update_enabled = true; } @@ -145,14 +158,21 @@ class PlacerCriticalities { ///@brief Set of pins with criticaltites modified by last call to update_criticalities(). vtr::vec_id_set cluster_pins_with_modified_criticality_; - ///@brief Updates criticalities: incremental V.S. from scratch + ///@brief Incremental update. See timing_place.cpp for more. void incr_update_criticalities(const SetupTimingInfo* timing_info); + + ///@brief From scratch update. See timing_place.cpp for more. void recompute_criticalities(); ///@brief Flag that turns on/off the update_criticalities() routine. bool update_enabled = true; - ///@brief Flag that checks if criticalities needs to be recomputed for all connections. + /** + * @brief Flag that checks if criticalities need to be recomputed for all connections. + * + * Used by the method update_criticalities(). They incremental update is not possible + * if this method wasn't called updated after the previous timing info update. + */ bool recompute_required = true; }; @@ -197,11 +217,19 @@ class PlacerSetupSlacks { pin_range pins_with_modified_setup_slack() const; public: //Modifiers - ///@brief Updates setup slacks based on the atom netlist setup slacks provided by timing_info. + /** + * @brief Updates setup slacks based on the atom netlist setup slacks provided + * by timing_info. + * + * Should consistently call this method after the most recent timing analysis to + * keep the setup slacks stored in this class in sync with the timing analyzer. + * If out of sync, then the setup slacks cannot be incrementally updated on + * during the next timing analysis iteration. + */ void update_setup_slacks(const SetupTimingInfo* timing_info); ///@brief Override the setup slack of a particular connection. - void set_setup_slack(ClusterNetId net, int ipin, float val); + void set_setup_slack(ClusterNetId net, int ipin, float slack_val); ///@brief Set `update_enabled` to true. void enable_update() { update_enabled = true; } @@ -223,14 +251,21 @@ class PlacerSetupSlacks { ///@brief Set of pins with raw setup slacks modified by last call to update_setup_slacks() vtr::vec_id_set cluster_pins_with_modified_setup_slack_; - ///@brief Updates setup slacks: incremental V.S. from scratch. + ///@brief Incremental update. See timing_place.cpp for more. void incr_update_setup_slacks(const SetupTimingInfo* timing_info); + + ///@brief Incremental update. See timing_place.cpp for more. void recompute_setup_slacks(); ///@brief Flag that turns on/off the update_setup_slacks() routine. bool update_enabled = true; - ///@brief Flag that checks if setup slacks needs to be recomputed for all connections. + /** + * @brief Flag that checks if setup slacks need to be recomputed for all connections. + * + * Used by the method update_setup_slacks(). They incremental update is not possible + * if this method wasn't called updated after the previous timing info update. + */ bool recompute_required = true; }; @@ -283,7 +318,7 @@ class PlacerSetupSlacks { * * It is important to note that due to limited floating point precision, floating point * arithmetic has an order dependence (due to round-off). Using a binary tree to total - * the timing connection costs allows us to incrementally update the total timign cost while + * the timing connection costs allows us to incrementally update the total timing cost while * maintianing the *same order of operations* as if it was re-computed from scratch. This * ensures we *always* get consistent results regardless of what/when connections are changed. * @@ -292,7 +327,7 @@ class PlacerSetupSlacks { * NetProxy is returned by PlacerTimingCost's operator[], and stores a pointer to the start of * internal storage of that net's connection costs. * - * ConnectionProxy is returnd by NetProxy's operator[], and holds a reference to a particular + * ConnectionProxy is returned by NetProxy's operator[], and holds a reference to a particular * element of the internal storage pertaining to a specific connection's cost. ConnectionProxy * supports assignment, allowing clients to modify the connection cost. It also detects if the * assigned value differs from the previous value and if so, calls PlacerTimingCosts's diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt index 5cf098b2f77..e59cabff1c1 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/task_list.txt @@ -70,3 +70,4 @@ regression_tests/vtr_reg_strong/strong_timing_update_type regression_tests/vtr_reg_strong/strong_timing_update_diff regression_tests/vtr_reg_strong/strong_blocks_with_no_inputs regression_tests/vtr_reg_strong/strong_fix_clusters +regression_tests/vtr_reg_strong/strong_place_quench_slack From e3d7d652154a92947ce7e3d48695cb26c0370f50 Mon Sep 17 00:00:00 2001 From: Bingran Hu Date: Tue, 8 Sep 2020 06:35:08 -0400 Subject: [PATCH 24/24] Removed find_affected_sink_pins() to eliminate the placer runtime increase. blocks_affected.affected_pins now only stores moved pins that have changed connection delays. --- vpr/src/place/place.cpp | 173 +++++++++++++++++---------------- vpr/src/place/timing_place.cpp | 8 +- 2 files changed, 91 insertions(+), 90 deletions(-) diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 53349441d29..664bd725463 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -164,8 +164,8 @@ static vtr::vector bb_updated_before; */ static ClbNetPinsMatrix connection_delay; //Delays based on committed block positions static ClbNetPinsMatrix proposed_connection_delay; //Delays for proposed block positions (only - // for connections effected by move, otherwise - // INVALID_DELAY) +// for connections effected by move, otherwise +// INVALID_DELAY) static ClbNetPinsMatrix connection_setup_slack; //Setup slacks based on most recently updated timing graph @@ -175,18 +175,18 @@ static ClbNetPinsMatrix connection_setup_slack; //Setup slacks based on m */ static PlacerTimingCosts connection_timing_cost; //Costs of committed block positions static ClbNetPinsMatrix proposed_connection_timing_cost; //Costs for proposed block positions - // (only for connection effected by - // move, otherwise INVALID_DELAY) +// (only for connection effected by +// move, otherwise INVALID_DELAY) /* * Timing cost of nets (i.e. sum of criticality * delay for each net sink/connection). * Index ranges: [0..cluster_ctx.clb_nlist.nets().size()-1] */ static vtr::vector net_timing_cost; //Like connection_timing_cost, but summed - // accross net pins. Used to allow more - // efficient recalculation of timing cost - // if only a sub-set of nets are changed - // while maintaining numeric stability. +// accross net pins. Used to allow more +// efficient recalculation of timing cost +// if only a sub-set of nets are changed +// while maintaining numeric stability. /* [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates and the number of * * blocks on each of a net's bounding box (to allow efficient updates), * @@ -403,9 +403,9 @@ static void commit_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected); -static void invalidate_affected_connection_delays(const std::vector& sink_pins_affected, - ClusteredPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info); +static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, + ClusteredPinTimingInvalidator* pin_tedges_invalidator, + TimingInfo* timing_info); static bool driven_by_moved_block(const ClusterNetId net, const t_pl_blocks_to_be_moved& blocks_affected); @@ -417,9 +417,6 @@ static double comp_td_connection_cost(const PlaceDelayModel* delay_mode, const P static double sum_td_net_cost(ClusterNetId net); static double sum_td_costs(); -static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, - std::vector& sink_pins_affected); - static float analyze_setup_slack_cost(const PlacerSetupSlacks* setup_slacks); static e_move_result assess_swap(double delta_c, double t); @@ -690,9 +687,9 @@ void try_place(const t_placer_opts& placer_opts, prev_inverse_costs.timing_cost = 1 / costs.timing_cost; prev_inverse_costs.bb_cost = 1 / costs.bb_cost; costs.cost = 1; /*our new cost function uses normalized values of */ - /*bb_cost and timing_cost, the value of cost will be reset */ - /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ - } else { /*BOUNDING_BOX_PLACE */ + /*bb_cost and timing_cost, the value of cost will be reset */ + /*to 1 at each temperature when *_TIMING_DRIVEN_PLACE is true */ + } else { /*BOUNDING_BOX_PLACE */ costs.cost = costs.bb_cost = comp_bb_cost(NORMAL); costs.timing_cost = 0; outer_crit_iter_count = 0; @@ -1647,12 +1644,6 @@ static e_move_result try_swap(float t, bb_delta_c, timing_delta_c); - //Find all the sink pins with changed connection delays from the affected blocks. - //These sink pins will be passed into the pin_timing_invalidator for timing update. - //They will also be added to the pin invalidator when we wish to revert a timing update. - std::vector sink_pins_affected; - find_affected_sink_pins(blocks_affected, sink_pins_affected); - //For setup slack analysis, we first do a timing analysis to get the newest slack values //resulted from the proposed block moves. If the move turns out to be accepted, we keep //the updated slack values and commit the block moves. If rejected, we reject the proposed @@ -1660,9 +1651,9 @@ static e_move_result try_swap(float t, if (place_algorithm == SLACK_TIMING_PLACE) { //Gather all the connections with modified delays for incremental timing updates. //This routine relies on comparing proposed_connection_delay and connection_delay. - invalidate_affected_connection_delays(sink_pins_affected, - pin_timing_invalidator, - timing_info); + invalidate_affected_connections(blocks_affected, + pin_timing_invalidator, + timing_info); //Update the connection_timing_cost and connection_delay //values from the temporary values. @@ -1723,9 +1714,9 @@ static e_move_result try_swap(float t, //This routine relies on comparing proposed_connection_delay and connection_delay //If the setup slack analysis was not performed, the //sink pins are yet to be invalidated. - invalidate_affected_connection_delays(sink_pins_affected, - pin_timing_invalidator, - timing_info); + invalidate_affected_connections(blocks_affected, + pin_timing_invalidator, + timing_info); //update the connection_timing_cost and connection_delay //values from the temporary values @@ -1756,9 +1747,9 @@ static e_move_result try_swap(float t, //Re-invalidate the affected sink pins since the proposed move is //rejected, and the same blocks are reverted to their original //positions. The affected sink pins should stay the same. - invalidate_affected_connection_delays(sink_pins_affected, - pin_timing_invalidator, - timing_info); + invalidate_affected_connections(blocks_affected, + pin_timing_invalidator, + timing_info); /* Revert the timing update */ update_timing_classes(crit_exponent, @@ -1845,7 +1836,7 @@ static int find_affected_nets_and_update_costs(const t_place_algorithm& place_al update_net_bb(net_id, blocks_affected, iblk, blk, blk_pin); if (place_algorithm.is_timing_driven()) { - //Determine the change in timing costs if required + /* Determine the change in connection delay and timing cost */ update_td_delta_costs(delay_model, *criticalities, net_id, blk_pin, blocks_affected, timing_delta_c); } } @@ -1907,6 +1898,35 @@ static void update_net_bb(const ClusterNetId net, } } +/** + * @brief Calculate the new connection delay and timing cost of all the + * sink pins affected by moving a specific pin to a new location. + * Also calculates the total change in the timing cost. + * + * Assumes that the blocks have been moved to the proposed new locations. + * Otherwise, the routine comp_td_connection_delay() will not be able to + * calculate the most up to date connection delay estimation value. + * + * If the moved pin is a driver pin, then all the sink connections that are + * driven by this driver pin are considered. + * + * If the moved pin is a sink pin, then it is the only pin considered. But + * in some cases, the sink is already accounted for if it is also driven + * by a driver pin located on a moved block. Computing it again would double + * count its affect on the total timing cost change (delta_timing_cost). + * + * It is possible for some connections to have unchanged delays. For instance, + * if we are using a dx/dy delay model, this could occur if a sink pin moved + * to a new position with the same dx/dy from its net's driver pin. + * + * We skip these connections with unchanged delay values as their delay need + * not be updated. Their timing costs also do not require any update, since + * the criticalities values are always kept stale/unchanged during an block + * swap attempt. (Unchanged Delay * Unchanged Criticality = Unchanged Cost) + * + * This is also done to minimize the number of timing node/edge invalidations + * for incremental static timing analysis (incremental STA). + */ static void update_td_delta_costs(const PlaceDelayModel* delay_model, const PlacerCriticalities& criticalities, const ClusterNetId net, @@ -1916,69 +1936,50 @@ static void update_td_delta_costs(const PlaceDelayModel* delay_model, auto& cluster_ctx = g_vpr_ctx.clustering(); if (cluster_ctx.clb_nlist.pin_type(pin) == PinType::DRIVER) { - //This pin is a net driver on a moved block. - //Re-compute all point to point connections for this net. + /* This pin is a net driver on a moved block. */ + /* Recompute all point to point connection delays for the net sinks. */ for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net).size(); ipin++) { float temp_delay = comp_td_connection_delay(delay_model, net, ipin); - proposed_connection_delay[net][ipin] = temp_delay; + /* If the delay hasn't changed, do not mark this pin as affected */ + if (temp_delay == connection_delay[net][ipin]) { + continue; + } + /* Calculate proposed delay and cost values */ + proposed_connection_delay[net][ipin] = temp_delay; proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay; delta_timing_cost += proposed_connection_timing_cost[net][ipin] - connection_timing_cost[net][ipin]; + /* Record this connection in blocks_affected.affected_pins */ ClusterPinId sink_pin = cluster_ctx.clb_nlist.net_pin(net, ipin); blocks_affected.affected_pins.push_back(sink_pin); } } else { - //This pin is a net sink on a moved block + /* This pin is a net sink on a moved block */ VTR_ASSERT_SAFE(cluster_ctx.clb_nlist.pin_type(pin) == PinType::SINK); - //If this net is being driven by a moved block, we do not - //need to compute the change in the timing cost (here) since it will - //be computed by the net's driver pin (since the driver block moved). - // - //Computing it here would double count the change, and mess up the - //delta_timing_cost value. + /* Check if this sink's net is driven by a moved block */ if (!driven_by_moved_block(net, blocks_affected)) { - int net_pin = cluster_ctx.clb_nlist.pin_net_index(pin); + /* Get the sink pin index in the net */ + int ipin = cluster_ctx.clb_nlist.pin_net_index(pin); - float temp_delay = comp_td_connection_delay(delay_model, net, net_pin); - proposed_connection_delay[net][net_pin] = temp_delay; + float temp_delay = comp_td_connection_delay(delay_model, net, ipin); + /* If the delay hasn't changed, do not mark this pin as affected */ + if (temp_delay == connection_delay[net][ipin]) { + return; + } - proposed_connection_timing_cost[net][net_pin] = criticalities.criticality(net, net_pin) * temp_delay; - delta_timing_cost += proposed_connection_timing_cost[net][net_pin] - connection_timing_cost[net][net_pin]; + /* Calculate proposed delay and cost values */ + proposed_connection_delay[net][ipin] = temp_delay; + proposed_connection_timing_cost[net][ipin] = criticalities.criticality(net, ipin) * temp_delay; + delta_timing_cost += proposed_connection_timing_cost[net][ipin] - connection_timing_cost[net][ipin]; + /* Record this connection in blocks_affected.affected_pins */ blocks_affected.affected_pins.push_back(pin); } } } -/** - * @brief Find all the sink pins with changed connection delays from the affected blocks. - * - * These sink pins will be passed into the pin_timing_invalidator for timing update. - * They will also be added to the pin invalidator when we wish to revert a timing update. - * - * It is possible that some connections may not have changed delay. For instance, if - * using a dx/dy delay model, this could occur if a sink moved to a new position with - * the same dx/dy from it's driver. To minimize work during the incremental STA update - * we do not invalidate such unchanged connections. - */ -static void find_affected_sink_pins(const t_pl_blocks_to_be_moved& blocks_affected, - std::vector& sink_pins_affected) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& clb_nlist = cluster_ctx.clb_nlist; - - for (ClusterPinId clb_pin : blocks_affected.affected_pins) { - ClusterNetId net = clb_nlist.pin_net(clb_pin); - int ipin = clb_nlist.pin_net_index(clb_pin); - - if (proposed_connection_delay[net][ipin] != connection_delay[net][ipin]) { - //Delay has changed. Must invalidate this sink pin. - sink_pins_affected.push_back(clb_pin); - } - } -} - /** * @brief Check if the setup slack has gotten better or worse due to block swap. * @@ -2233,23 +2234,23 @@ static void revert_td_cost(const t_pl_blocks_to_be_moved& blocks_affected) { } /** - * @brief Invalidates the delays of connections effected by the specified move. + * @brief Invalidates the connections affected by the specified block moves. * - * Relies on find_affected_sink_pins() to find all the connections with different - * `proposed_connection_delay` and `connection_delay`. + * All the connections recorded in blocks_affected.affected_pins have different + * values for `proposed_connection_delay` and `connection_delay`. * - * Invalidate all the timing graph edges associated with these sink pins via the - * ClusteredPinTimingInvalidator class. + * Invalidate all the timing graph edges associated with these connections via + * the ClusteredPinTimingInvalidator class. */ -static void invalidate_affected_connection_delays(const std::vector& sink_pins_affected, - ClusteredPinTimingInvalidator* pin_tedges_invalidator, - TimingInfo* timing_info) { +static void invalidate_affected_connections(const t_pl_blocks_to_be_moved& blocks_affected, + ClusteredPinTimingInvalidator* pin_tedges_invalidator, + TimingInfo* timing_info) { VTR_ASSERT_SAFE(timing_info); VTR_ASSERT_SAFE(pin_tedges_invalidator); - //Invalidate timing graph edges affected by the move - for (ClusterPinId clb_pin : sink_pins_affected) { - pin_tedges_invalidator->invalidate_connection(clb_pin, timing_info); + /* Invalidate timing graph edges affected by the move */ + for (ClusterPinId pin : blocks_affected.affected_pins) { + pin_tedges_invalidator->invalidate_connection(pin, timing_info); } } diff --git a/vpr/src/place/timing_place.cpp b/vpr/src/place/timing_place.cpp index 917ad860759..d4dfbcc6f52 100644 --- a/vpr/src/place/timing_place.cpp +++ b/vpr/src/place/timing_place.cpp @@ -121,8 +121,8 @@ void PlacerCriticalities::recompute_criticalities() { ///@brief Override the criticality of a particular connection. void PlacerCriticalities::set_criticality(ClusterNetId net_id, int ipin, float crit_val) { - VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin = 0)"); - VTR_ASSERT_SAFE_MSG(ipin < clb_nlist_.net_pins(net_id).size(), "The pin index in net should be smaller than fanout"); + VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)"); + VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout"); timing_place_crit_[net_id][ipin] = crit_val; } @@ -224,8 +224,8 @@ void PlacerSetupSlacks::recompute_setup_slacks() { ///@brief Override the setup slack of a particular connection. void PlacerSetupSlacks::set_setup_slack(ClusterNetId net_id, int ipin, float slack_val) { - VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin = 0)"); - VTR_ASSERT_SAFE_MSG(ipin < clb_nlist_.net_pins(net_id).size(), "The pin index in net should be smaller than fanout"); + VTR_ASSERT_SAFE_MSG(ipin > 0, "The pin should not be a driver pin (ipin != 0)"); + VTR_ASSERT_SAFE_MSG(ipin < int(clb_nlist_.net_pins(net_id).size()), "The pin index in net should be smaller than fanout"); timing_place_setup_slacks_[net_id][ipin] = slack_val; }