Skip to content

Commit ccc5aa4

Browse files
add comments for HBM-style traffoc flow files
1 parent e7058da commit ccc5aa4

7 files changed

+110
-0
lines changed

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_16_noc_2way_ring.flows

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
<traffic_flows>
22

3+
<!--
4+
In this benchmark, each NoC routers sends traffic flows to two
5+
NoC routers after itself. These two traffic flows cannot share the
6+
same NoC link without causing congestion. Therefore, they should be
7+
routed without using any shared links. This benchmark is a simple
8+
sanity check for congestion-aware NoC placement and traffic flow routing
9+
algorithms.
10+
-->
11+
312
<single_flow src=".*noc_router_adapter_block_1[^\d].*" dst=".*noc_router_adapter_block_2.*" bandwidth="7e5" />
413
<single_flow src=".*noc_router_adapter_block_2.*" dst=".*noc_router_adapter_block_3.*" bandwidth="7e5" />
514
<single_flow src=".*noc_router_adapter_block_3.*" dst=".*noc_router_adapter_block_4.*" bandwidth="7e5" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_4_noc_2way_ring.flows

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
<traffic_flows>
22

3+
<!--
4+
In this benchmark, each NoC routers sends traffic flows to two
5+
NoC routers after itself. These two traffic flows cannot share the
6+
same NoC link without causing congestion. Therefore, they should be
7+
routed without using any shared links. This benchmark is a simple
8+
sanity check for congestion-aware NoC placement and traffic flow routing
9+
algorithms.
10+
-->
11+
312
<single_flow src=".*noc_router_adapter_block_1.*" dst=".*noc_router_adapter_block_2.*" bandwidth="7e5" />
413
<single_flow src=".*noc_router_adapter_block_2.*" dst=".*noc_router_adapter_block_3.*" bandwidth="7e5" />
514
<single_flow src=".*noc_router_adapter_block_3.*" dst=".*noc_router_adapter_block_4.*" bandwidth="7e5" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_64_noc_bucket_sort.flows

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
<traffic_flows>
22

3+
<!--
4+
Based on the architecture proposed in [1],
5+
we lock down 8 NoC routers at two opposite sides of
6+
the device. Each PE line is a chain of NoC-attached PEs
7+
starting from one of the bottom NoC routers and ending at
8+
one of the top routers. There are two fully-connected layers
9+
that redistribute the data between PE lines, complicating a
10+
congestion-free mapping. Fixed NoC routers are specified in
11+
complex_64_noc_bucket_sort.fixed file, which is to be passed
12+
to VPR by --fix_clusters command line option. Fixed clusters file
13+
assume that the stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
14+
architecture with EP4SE820 is selected.
15+
16+
[1] N. Samardzic, W. Qiao, V. Aggarwal, M.-C. F. Chang, and J. Cong,
17+
“Bonsai: High-performance adaptive merge tree sorting,” in 2020
18+
ACM/IEEE 47th Annual International Symposium on Computer Architecture (ISCA), pp. 282–294, IEEE, 2020
19+
-->
20+
321
<single_flow src=".*noc_router_adapter_block_1[^\d].*" dst=".*noc_router_adapter_block_2[^\d].*" bandwidth="1e6" />
422
<single_flow src=".*noc_router_adapter_block_2[^\d].*" dst=".*noc_router_adapter_block_3[^\d].*" bandwidth="1e6" />
523
<single_flow src=".*noc_router_adapter_block_5[^\d].*" dst=".*noc_router_adapter_block_6[^\d].*" bandwidth="1e6" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_64_noc_gaussian_elimination.flows

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,27 @@
11
<traffic_flows>
22

3+
<!--
4+
Inspired by the triangular topology
5+
introduced in [1], we lock down NoC routers at opposite
6+
corners of the device, while NoC-attached PEs transmit
7+
traffic flows in a triangular pattern. When the triangle’s
8+
dimensions are smaller than the NoC, a congestion-free
9+
mapping can be easily found. However, in our benchmarks,
10+
perpendicular sides of the triangle have 11 NoC routers
11+
while the FPGA being targeted contains a 10x10 mesh of
12+
physical NoC routers, making a congestion-free solution
13+
challenging. Fixed NoC routers are specified in
14+
complex_64_noc_gaussian_elimination.fixed file, which is to be passed
15+
to VPR by --fix_clusters command line option. Fixed clusters file
16+
assume that the stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
17+
architecture with EP4SE820 is selected.
18+
19+
[1] J. Wang, L. Guo, and J. Cong, “AutoSA: A polyhedral compiler for
20+
high-performance systolic arrays on FPGA,” in The 2021 ACM/SIGDA
21+
International Symposium on Field-Programmable Gate Arrays, pp. 93–
22+
104, 2021.
23+
-->
24+
325
<single_flow src=".*noc_router_adapter_block_1[^\d].*" dst=".*noc_router_adapter_block_2[^\d].*" bandwidth="1e6" latency_cons="7e-9" />
426
<single_flow src=".*noc_router_adapter_block_2[^\d].*" dst=".*noc_router_adapter_block_3[^\d].*" bandwidth="1e6" latency_cons="7e-9" />
527
<single_flow src=".*noc_router_adapter_block_3[^\d].*" dst=".*noc_router_adapter_block_4[^\d].*" bandwidth="1e6" latency_cons="7e-9" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_64_noc_genome_seq.flows

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,29 @@
11
<traffic_flows>
22

3+
<!--
4+
5+
Inspired by the architecture proposed
6+
in [1], we lock down two logical NoC routers on opposite
7+
sides of an FPGA device to emulate hardened memory
8+
controllers. This benchmark consists of nine processing
9+
element (PE) lines where multiple NoC-attached PEs are
10+
connected in a chain. The first fixed NoC router transmits
11+
traffic flows to the first PE in each line, while the last PE
12+
in each line transmits a traffic flow to the other fixed NoC
13+
router. Fixed NoC routers are specified in
14+
complex_64_noc_genome_seq.fixed file, which is to be passed
15+
to VPR by --fix_clusters command line option. Fixed clusters file
16+
assume that the stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
17+
architecture with EP4SE820 is selected.
18+
19+
[1] L. Guo, J. Lau, Z. Ruan, P. Wei, and J. Cong, “Hardware acceleration of
20+
long read pairwise overlapping in genome sequencing: A race between
21+
FPGA and GPU,” in 2019 IEEE 27th Annual International Symposium on
22+
Field-Programmable Custom Computing Machines (FCCM),
23+
pp. 127–135, IEEE, 2019
24+
-->
25+
26+
327
<single_flow src=".*noc_router_adapter_block_1[^\d].*" dst=".*noc_router_adapter_block_2[^\d].*" bandwidth="3.33e5" />
428
<single_flow src=".*noc_router_adapter_block_2[^\d].*" dst=".*noc_router_adapter_block_3[^\d].*" bandwidth="1e6" />
529
<single_flow src=".*noc_router_adapter_block_3[^\d].*" dst=".*noc_router_adapter_block_4[^\d].*" bandwidth="1e6" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_64_noc_page_rank.flows

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,24 @@
11
<traffic_flows>
22

3+
<!--
4+
Similar to the genome sequencing benchmark,
5+
a logical NoC router, which distributes traffic flows to nine
6+
PE lines, is locked down at the bottom of the device.
7+
InterPE traffic flows in each line flow in both directions. At the
8+
end of each line, a logical NoC router is locked down at the
9+
top of the device to emulate an external memory interface.
10+
This traffic flow pattern was inspired by the architecture
11+
introduced in [1]. Fixed NoC routers are specified in
12+
complex_64_noc_page_rank.fixed file, which is to be passed
13+
to VPR by --fix_clusters command line option. Fixed clusters file
14+
assume that the stratixiv_arch.timing_with_a_embedded_10X10_mesh_noc_topology.xml
15+
architecture with EP4SE820 is selected.
16+
17+
[1] Y. Chi, L. Guo, J. Lau, Y.-k. Choi, J. Wang, and J. Cong, “Extending
18+
high-level synthesis for task-parallel programs,” in 2021 IEEE 29th
19+
Annual International Symposium on Field-Programmable Custom Computing Machines (FCCM), pp. 204–213, IEEE, 2021.
20+
-->
21+
322
<single_flow src=".*noc_router_adapter_block_1[^\d].*" dst=".*noc_router_adapter_block_2[^\d].*" bandwidth="3.33e5" />
423
<single_flow src=".*noc_router_adapter_block_2[^\d].*" dst=".*noc_router_adapter_block_3[^\d].*" bandwidth="1e6" />
524
<single_flow src=".*noc_router_adapter_block_3[^\d].*" dst=".*noc_router_adapter_block_2[^\d].*" bandwidth="1e6" />

vtr_flow/benchmarks/noc/Synthetic_Designs/congestion_traffic_flow_files/complex_8_noc_2way_ring.flows

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
<traffic_flows>
22

3+
<!--
4+
In this benchmark, each NoC routers sends traffic flows to two
5+
NoC routers after itself. These two traffic flows cannot share the
6+
same NoC link without causing congestion. Therefore, they should be
7+
routed without using any shared links. This benchmark is a simple
8+
sanity check for congestion-aware NoC placement and traffic flow routing
9+
algorithms.
10+
-->
11+
312
<single_flow src=".*noc_router_adapter_block_1.*" dst=".*noc_router_adapter_block_2.*" bandwidth="7e5" />
413
<single_flow src=".*noc_router_adapter_block_2.*" dst=".*noc_router_adapter_block_3.*" bandwidth="7e5" />
514
<single_flow src=".*noc_router_adapter_block_3.*" dst=".*noc_router_adapter_block_4.*" bandwidth="7e5" />

0 commit comments

Comments
 (0)