Skip to content

Commit 7b59537

Browse files
committed
vpr: Fix slow look-up of clb to atom pins mapping during placement
Previously this mapping was needlessly recalculated once per temperature in-order to update clustered net criticalities. We now calculate this mapping once at the beginning of placement, and re-use it. This yeilds ~22% placement speed-up on the LU8 benchmark. Also update the timing driven router to use the faster look-up.
1 parent 517652e commit 7b59537

17 files changed

+193
-116
lines changed

libs/libvtrutil/src/vtr_vector.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef VTR_VECTOR
22
#define VTR_VECTOR
33
#include <vector>
4+
#include <cstddef>
45

56
namespace vtr {
67

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#include "clustered_netlist_utils.h"
2+
ClusteredPinAtomPinsLookup::ClusteredPinAtomPinsLookup(const ClusteredNetlist& clustered_netlist, const IntraLbPbPinLookup& pb_gpin_lookup) {
3+
init_lookup(clustered_netlist, pb_gpin_lookup);
4+
}
5+
6+
ClusteredPinAtomPinsLookup::atom_pin_range ClusteredPinAtomPinsLookup::connected_atom_pins(ClusterPinId clustered_pin) const {
7+
VTR_ASSERT(clustered_pin);
8+
//return vtr::make_range(clustered_pin_connected_atom_pins_[clustered_pin]);
9+
return vtr::make_range(clustered_pin_connected_atom_pins_[clustered_pin].begin(),
10+
clustered_pin_connected_atom_pins_[clustered_pin].end());
11+
//return atom_pin_range(clustered_pin_connected_atom_pins_[clustered_pin].begin(), clustered_pin_connected_atom_pins_[clustered_pin].end());
12+
}
13+
14+
void ClusteredPinAtomPinsLookup::init_lookup(const ClusteredNetlist& clustered_netlist, const IntraLbPbPinLookup& pb_gpin_lookup) {
15+
auto clustered_pins = clustered_netlist.pins();
16+
clustered_pin_connected_atom_pins_.clear();
17+
clustered_pin_connected_atom_pins_.resize(clustered_pins.size());
18+
for (ClusterPinId clustered_pin : clustered_pins) {
19+
20+
auto clustered_block = clustered_netlist.pin_block(clustered_pin);
21+
int phys_pin_index = clustered_netlist.pin_physical_index(clustered_pin);
22+
clustered_pin_connected_atom_pins_[clustered_pin] = find_clb_pin_connected_atom_pins(clustered_block, phys_pin_index, pb_gpin_lookup);
23+
}
24+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#ifndef CLUSTERED_NETLIST_UTILS_H
2+
#define CLUSTERED_NETLIST_UTILS_H
3+
4+
#include "vtr_vector.h"
5+
#include "vtr_range.h"
6+
7+
#include "vpr_utils.h"
8+
#include "atom_netlist_fwd.h"
9+
#include "clustered_netlist_fwd.h"
10+
11+
class ClusteredPinAtomPinsLookup {
12+
public:
13+
typedef std::vector<AtomPinId>::const_iterator atom_pin_iterator;
14+
typedef typename vtr::Range<atom_pin_iterator> atom_pin_range;
15+
public:
16+
ClusteredPinAtomPinsLookup(const ClusteredNetlist& clustered_netlist, const IntraLbPbPinLookup& pb_gpin_lookup);
17+
18+
atom_pin_range connected_atom_pins(ClusterPinId clustered_pin) const;
19+
20+
private:
21+
void init_lookup(const ClusteredNetlist& clustered_netlist, const IntraLbPbPinLookup& pb_gpin_lookup);
22+
23+
private:
24+
vtr::vector<ClusterPinId,std::vector<AtomPinId>> clustered_pin_connected_atom_pins_;
25+
};
26+
27+
#endif

vpr/src/base/netlist.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,7 @@ class Netlist {
561561
//Returns the net associated with the specified pin
562562
NetId pin_net(const PinId pin_id) const;
563563

564+
//Returns the index of the specified pin within it's connected net
564565
int pin_net_index(const PinId pin_id) const;
565566

566567
//Returns the port associated with the specified pin

vpr/src/place/place.cpp

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ static void outer_loop_recompute_criticalities(t_placer_opts placer_opts,
297297
float * place_delay_value, float * timing_cost, float * delay_cost,
298298
int * outer_crit_iter_count, float * inverse_prev_timing_cost,
299299
float * inverse_prev_bb_cost,
300-
const IntraLbPbPinLookup& pb_gpin_lookup,
300+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
301301
#ifdef ENABLE_CLASSIC_VPR_STA
302302
t_slack* slacks,
303303
t_timing_inf timing_inf,
@@ -313,7 +313,7 @@ static void placement_inner_loop(float t, float rlim, t_placer_opts placer_opts,
313313
t_slack* slacks,
314314
t_timing_inf timing_inf,
315315
#endif
316-
const IntraLbPbPinLookup& pb_gpin_lookup,
316+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
317317
SetupTimingInfo& timing_info);
318318

319319
/*****************************************************************************/
@@ -385,6 +385,8 @@ void try_place(t_placer_opts placer_opts,
385385
initial_placement(placer_opts.pad_loc_type, placer_opts.pad_loc_file.c_str());
386386
init_draw_coords((float) width_fac);
387387

388+
//Enables fast look-up of atom pins connect to CLB pins
389+
ClusteredPinAtomPinsLookup netlist_pin_lookup(cluster_ctx.clb_nlist, pb_gpin_lookup);
388390

389391
/* Gets initial cost and loads bounding boxes. */
390392

@@ -414,7 +416,7 @@ void try_place(t_placer_opts placer_opts,
414416
timing_info->set_warn_unconstrained(false); //Don't warn again about unconstrained nodes again during placement
415417

416418
//Initial slack estimates
417-
load_criticalities(*timing_info, crit_exponent, pb_gpin_lookup);
419+
load_criticalities(*timing_info, crit_exponent, netlist_pin_lookup);
418420

419421
critical_path = timing_info->least_slack_critical_path();
420422

@@ -572,7 +574,7 @@ void try_place(t_placer_opts placer_opts,
572574
outer_loop_recompute_criticalities(placer_opts, num_connections,
573575
crit_exponent, bb_cost, &place_delay_value, &timing_cost, &delay_cost,
574576
&outer_crit_iter_count, &inverse_prev_timing_cost, &inverse_prev_bb_cost,
575-
pb_gpin_lookup,
577+
netlist_pin_lookup,
576578
#ifdef ENABLE_CLASSIC_VPR_STA
577579
slacks,
578580
timing_inf,
@@ -586,7 +588,7 @@ void try_place(t_placer_opts placer_opts,
586588
slacks,
587589
timing_inf,
588590
#endif
589-
pb_gpin_lookup,
591+
netlist_pin_lookup,
590592
*timing_info);
591593

592594
/* Lines below prevent too much round-off error from accumulating *
@@ -694,7 +696,7 @@ void try_place(t_placer_opts placer_opts,
694696
outer_loop_recompute_criticalities(placer_opts, num_connections,
695697
crit_exponent, bb_cost, &place_delay_value, &timing_cost, &delay_cost,
696698
&outer_crit_iter_count, &inverse_prev_timing_cost, &inverse_prev_bb_cost,
697-
pb_gpin_lookup,
699+
netlist_pin_lookup,
698700
#ifdef ENABLE_CLASSIC_VPR_STA
699701
slacks,
700702
timing_inf,
@@ -712,7 +714,7 @@ void try_place(t_placer_opts placer_opts,
712714
slacks,
713715
timing_inf,
714716
#endif
715-
pb_gpin_lookup,
717+
netlist_pin_lookup,
716718
*timing_info);
717719

718720
tot_iter += move_lim;
@@ -868,7 +870,7 @@ static void outer_loop_recompute_criticalities(t_placer_opts placer_opts,
868870
float * place_delay_value, float * timing_cost, float * delay_cost,
869871
int * outer_crit_iter_count, float * inverse_prev_timing_cost,
870872
float * inverse_prev_bb_cost,
871-
const IntraLbPbPinLookup& pb_gpin_lookup,
873+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
872874
#ifdef ENABLE_CLASSIC_VPR_STA
873875
t_slack* slacks,
874876
t_timing_inf timing_inf,
@@ -892,7 +894,7 @@ static void outer_loop_recompute_criticalities(t_placer_opts placer_opts,
892894

893895
//Per-temperature timing update
894896
timing_info.update();
895-
load_criticalities(timing_info, crit_exponent, pb_gpin_lookup);
897+
load_criticalities(timing_info, crit_exponent, netlist_pin_lookup);
896898

897899
#ifdef ENABLE_CLASSIC_VPR_STA
898900
load_timing_graph_net_delays(point_to_point_delay_cost);
@@ -922,7 +924,7 @@ static void placement_inner_loop(float t, float rlim, t_placer_opts placer_opts,
922924
t_slack* slacks,
923925
t_timing_inf timing_inf,
924926
#endif
925-
const IntraLbPbPinLookup& pb_gpin_lookup,
927+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
926928
SetupTimingInfo& timing_info) {
927929

928930
int inner_crit_iter_count, inner_iter;
@@ -978,7 +980,7 @@ static void placement_inner_loop(float t, float rlim, t_placer_opts placer_opts,
978980
*/
979981
//Inner loop timing update
980982
timing_info.update();
981-
load_criticalities(timing_info, crit_exponent, pb_gpin_lookup);
983+
load_criticalities(timing_info, crit_exponent, netlist_pin_lookup);
982984

983985
#ifdef ENABLE_CLASSIC_VPR_STA
984986
load_timing_graph_net_delays(point_to_point_delay_cost);

vpr/src/place/timing_place.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ static void free_crit(vtr::t_chunk *chunk_list_ptr){
4949
}
5050

5151
/**************************************/
52-
void load_criticalities(SetupTimingInfo& timing_info, float crit_exponent, const IntraLbPbPinLookup& pb_gpin_lookup) {
52+
void load_criticalities(SetupTimingInfo& timing_info, float crit_exponent, const ClusteredPinAtomPinsLookup& pin_lookup) {
5353
/* Performs a 1-to-1 mapping from criticality to f_timing_place_crit.
5454
For every pin on every net (or, equivalently, for every tedge ending
5555
in that pin), f_timing_place_crit = criticality^(criticality exponent) */
@@ -58,14 +58,17 @@ void load_criticalities(SetupTimingInfo& timing_info, float crit_exponent, const
5858
for (auto net_id : cluster_ctx.clb_nlist.nets()) {
5959
if (cluster_ctx.clb_nlist.net_is_global(net_id))
6060
continue;
61-
for (size_t ipin = 1; ipin < cluster_ctx.clb_nlist.net_pins(net_id).size(); ipin++) {
62-
float clb_pin_crit = calculate_clb_net_pin_criticality(timing_info, pb_gpin_lookup, net_id, ipin);
61+
62+
for (auto clb_pin : cluster_ctx.clb_nlist.net_sinks(net_id)) {
63+
int ipin = cluster_ctx.clb_nlist.pin_net_index(clb_pin);
64+
65+
float clb_pin_crit = calculate_clb_net_pin_criticality(timing_info, pin_lookup, clb_pin);
6366

6467
/* The placer likes a great deal of contrast between criticalities.
6568
Since path criticality varies much more than timing, we "sharpen" timing
6669
criticality by taking it to some power, crit_exponent (between 1 and 8 by default). */
6770
f_timing_place_crit[net_id][ipin] = pow(clb_pin_crit, crit_exponent);
68-
}
71+
}
6972
}
7073
}
7174

vpr/src/place/timing_place.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#define TIMING_PLACE
33

44
#include "timing_info_fwd.h"
5+
#include "clustered_netlist_utils.h"
56

67
void alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist,
78
t_router_opts router_opts,
@@ -11,7 +12,7 @@ void alloc_lookups_and_criticalities(t_chan_width_dist chan_width_dist,
1112

1213
void free_lookups_and_criticalities();
1314

14-
void load_criticalities(SetupTimingInfo& timing_info, float crit_exponent, const IntraLbPbPinLookup& pb_gpin_lookup);
15+
void load_criticalities(SetupTimingInfo& timing_info, float crit_exponent, const ClusteredPinAtomPinsLookup& pin_lookup);
1516

1617
float get_timing_place_crit(ClusterNetId net_id, int ipin);
1718
void set_timing_place_crit(ClusterNetId net_id, int ipin, float val);

vpr/src/route/connection_based_routing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class Connection_based_routing_resources {
120120
// and if so, mark them to be rerouted
121121
bool forcibly_reroute_connections(float max_criticality,
122122
std::shared_ptr<const SetupTimingInfo> timing_info,
123-
const IntraLbPbPinLookup& pb_gpin_lookup,
123+
const ClusteredPinAtomPinsLookup& netlist_pin_lookup,
124124
vtr::vector_map<ClusterNetId, float *> &net_delay);
125125

126126
};

0 commit comments

Comments
 (0)