verilog-to-routing
diff --git a/‎.github/workflows/labeler.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/labeler.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/test.yml
Lines changed: 12 additions & 0 deletions b/‎.github/workflows/test.yml
Lines changed: 12 additions & 0 deletions
diff --git a/‎.gitignore
Lines changed: 13 additions & 0 deletions b/‎.gitignore
Lines changed: 13 additions & 0 deletions
diff --git a/‎CHANGELOG.md
Lines changed: 7 additions & 0 deletions b/‎CHANGELOG.md
Lines changed: 7 additions & 0 deletions
diff --git a/‎CMakeLists.txt
Lines changed: 9 additions & 1 deletion b/‎CMakeLists.txt
Lines changed: 9 additions & 1 deletion
diff --git a/‎README.developers.md
Lines changed: 52 additions & 10 deletions b/‎README.developers.md
Lines changed: 52 additions & 10 deletions
diff --git a/‎doc/README
Lines changed: 2 additions & 2 deletions b/‎doc/README
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/src/vpr/command_line_usage.rst
Lines changed: 6 additions & 6 deletions b/‎doc/src/vpr/command_line_usage.rst
Lines changed: 6 additions & 6 deletions
diff --git a/‎doc/src/vtr/benchmarks.rst
Lines changed: 14 additions & 1 deletion b/‎doc/src/vtr/benchmarks.rst
Lines changed: 14 additions & 1 deletion
diff --git a/‎doc/src/vtr/get_vtr.rst
Lines changed: 10 additions & 0 deletions b/‎doc/src/vtr/get_vtr.rst
Lines changed: 10 additions & 0 deletions
diff --git a/‎libs/EXTERNAL/libargparse/README.md
Lines changed: 1 addition & 1 deletion b/‎libs/EXTERNAL/libargparse/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎libs/EXTERNAL/libargparse/argparse_test.cpp
Lines changed: 4 additions & 3 deletions b/‎libs/EXTERNAL/libargparse/argparse_test.cpp
Lines changed: 4 additions & 3 deletions
@@ -1,6 +1,6 @@
 name: "Pull Request Labeler"
 on:
-- pull_request
+- pull_request_target
 
 jobs:
   triage:
 
@@ -197,6 +197,11 @@ jobs:
             params: '-DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVPR_USE_EZGL=off',
             suite: 'vtr_reg_basic'
           },
+          {
+            name: 'Basic with CAPNPROTO disabled',
+            params: '-DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVTR_ENABLE_CAPNPROTO=off',
+            suite: 'vtr_reg_basic'
+          },
           {
             name: 'Basic with VTR_ENABLE_DEBUG_LOGGING',
             params: '-DVTR_ASSERT_LEVEL=3 -DWITH_BLIFEXPLORER=on -DVTR_ENABLE_DEBUG_LOGGING=on',
@@ -297,7 +302,13 @@ jobs:
         CMAKE_PARAMS: ${{ matrix.params }}
         BUILD_TYPE: debug
         LSAN_OPTIONS: 'exitcode=42' #Use a non-standard exit code to ensure LSAN errors are detected
+        # In Ubuntu 20240310.1.0, the entropy of ASLR has increased (28 -> 32). LLVM 14 in this
+        # image is not compatible with this increased ASLR entropy. Apparently, memory sanitizer
+        # depends on LLVM and all CI tests where VTR_ENABLE_SANITIZE is enabled fail. For a temporary
+        # fix, we manually reduce the entropy. This quick fix should be removed in the future
+        # when github deploys a more stable Ubuntu image.
       run: |
+        sudo sysctl -w vm.mmap_rnd_bits=28
         export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
         ./.github/scripts/build.sh
         # We skip QoR since we are only checking for errors in sanitizer runs
@@ -349,6 +360,7 @@ jobs:
         CMAKE_PARAMS: '-DVTR_ASSERT_LEVEL=3 -DVTR_ENABLE_SANITIZE=on -DVTR_IPO_BUILD=off -DWITH_BLIFEXPLORER=on -DWITH_PARMYS=OFF -DWITH_ODIN=on'
         BUILD_TYPE: debug
       run: |
+        sudo sysctl -w vm.mmap_rnd_bits=28
         export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
         ./.github/scripts/build.sh
         ./run_reg_test.py odin_reg_basic -show_failures -j2
 
@@ -38,6 +38,19 @@ vtr_flow/benchmarks/titan_blif/titan23
 vtr_flow/benchmarks/titan_blif/titan_new
 
 
+#
+# NoC MLP benchmarks
+#
+#   We ignore blif and vqm files because of thier large size.
+#   We also ignore symbolic links to traffic flow and blif files.
+#
+vtr_flow/benchmarks/noc/Large_Designs/MLP/**/*.vqm
+vtr_flow/benchmarks/noc/Large_Designs/MLP/**/*.blif
+vtr_flow/benchmarks/noc/Large_Designs/MLP/blif_files/*
+vtr_flow/benchmarks/noc/Large_Designs/MLP/traffic_flow_files/*
+MLP_Benchmark_Netlist_Files_blif.tar.gz
+MLP_Benchmark_Netlist_Files_vqm_blif.tar.gz
+
 #
 # ISPD benchmarks
 #
 
@@ -117,6 +117,13 @@ _The following are changes which have been implemented in the VTR master branch
 ### Deprecated
  * VPR's breadth-first router (use the timing-driven router, which provides supperiour QoR and Run-time)
 
+### Docker Image
+ * A docker image is available for VTR 8.0 release on mohamedelgammal/vtr8:latest. You can run it using the following commands:
+```
+$ sudo docker pull mohamedelgammal/vtr8:latest
+$ sudo docker run -it mohamedelgammal/vtr8:latest
+```
+ 
 ## v8.0.0-rc2 - 2019-08-01
 
 ### Changed
 
@@ -23,7 +23,7 @@ set(VTR_IPO_BUILD "auto" CACHE STRING "Should VTR be compiled with interprocedur
 set_property(CACHE VTR_IPO_BUILD PROPERTY STRINGS auto on off)
 
 #Allow the user to configure how much assertion checking should occur
-set(VTR_ASSERT_LEVEL "2" CACHE STRING "VTR assertion checking level. 0: no assertions, 1: fast assertions, 2: regular assertions, 3: additional assertions with noticable run-time overhead, 4: all assertions (including those with significant run-time cost)")
+set(VTR_ASSERT_LEVEL "2" CACHE STRING "VTR assertion checking level. 0: no assertions, 1: fast assertions, 2: regular assertions, 3: additional assertions with noticeable run-time overhead, 4: all assertions (including those with significant run-time cost)")
 set_property(CACHE VTR_ASSERT_LEVEL PROPERTY STRINGS 0 1 2 3 4)
 
 option(VTR_ENABLE_STRICT_COMPILE "Specifies whether compiler warnings should be treated as errors (e.g. -Werror)" OFF)
@@ -341,6 +341,14 @@ add_custom_target(get_titan_benchmarks
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
     COMMENT "Downloading (~1GB) and extracting Titan benchmarks (~10GB) into VTR source tree.")
 
+#
+# NoC MLP Benchmarks
+#
+add_custom_target(get_noc_mlp_benchmarks
+        COMMAND ./vtr_flow/scripts/download_noc_mlp.py --vtr_flow_dir ./vtr_flow
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+        COMMENT "Downloading (~100MB) and extracting NoC MLP benchmarks (~3.2GB) into VTR source tree.")
+
 #
 # ISPD Benchmarks
 #
 
@@ -386,17 +386,21 @@ The following are key QoR metrics which should be used to evaluate the impact of
 
 Implementation Quality Metrics:
 
-| Metric                      | Meaning                                                                  | Sensitivity |
-|-----------------------------|--------------------------------------------------------------------------|-------------|
-| num_pre_packed_blocks       | Number of primitive netlist blocks (after tech. mapping, before packing) | Low         |
-| num_post_packed_blocks      | Number of Clustered Blocks (after packing)                               | Medium      |
-| device_grid_tiles           | FPGA size in grid tiles                                                  | Low-Medium  |
-| min_chan_width              | The minimum routable channel width                                       | Medium\*    |
-| crit_path_routed_wirelength | The routed wirelength at the relaxed channel width                       | Medium      |
-| critical_path_delay         | The critical path delay at the relaxed channel width                     | Medium-High |
+| Metric                          | Meaning                                                                      | Sensitivity |
+|---------------------------------|------------------------------------------------------------------------------|-------------|
+| num_pre_packed_blocks           | Number of primitive netlist blocks (after tech. mapping, before packing)     | Low         |
+| num_post_packed_blocks          | Number of Clustered Blocks (after packing)                                   | Medium      |
+| device_grid_tiles               | FPGA size in grid tiles                                                      | Low-Medium  |
+| min_chan_width                  | The minimum routable channel width                                           | Medium\*    |
+| crit_path_routed_wirelength     | The routed wirelength at the relaxed channel width                           | Medium      |
+| NoC_agg_bandwidth\**            | The total link bandwidth utilized by all traffic flows                       | Low         |
+| NoC_latency\**                  | The total time of traffic flow data transfer (summed over all traffic flows) | Low         |
+| NoC_latency_constraints_cost\** | Total number of traffic flows that meet their latency constraints            | Low         |
 
 \* By default, VPR attempts to find the minimum routable channel width; it then performs routing at a relaxed (e.g. 1.3x minimum) channel width. At minimum channel width routing congestion can distort the true timing/wirelength characteristics. Combined with the fact that most FPGA architectures are built with an abundance of routing, post-routing metrics are usually only evaluated at the relaxed channel width.
 
+\** NoC-related metrics are only reported when --noc option is enabled.
+
 Run-time/Memory Usage Metrics:
 
 | Metric                      | Meaning                                                                        | Sensitivity |
@@ -493,7 +497,7 @@ k6_frac_N10_frac_chain_mem32K_40nm.xml	boundtop.v        	common       	9f591f6-
 k6_frac_N10_frac_chain_mem32K_40nm.xml	ch_intrinsics.v   	common       	9f591f6-dirty	success   	     	363                	493                  	270                 	247                   	10          	10           	17     	99    	130        	1           0       	1792                 	1.86527       	-194.602            	-1.86527            	46            	1562             	13                                    	1438                       	20                               	2.4542             	-226.033 	-2.4542  	0       	0       	3.92691e+06           	1.4642e+06           	259806.                          	2598.06                             	333135.                     	3331.35                        	0.03           	0.01          	-1          	-1          	-1      	0.46     	0.31      	0.94                     	0.09                	2.59                 	62684      	8672        	32940
 ```
 
-### Example: Titan Benchmarks QoR Measurements
+### Example: Titan Benchmarks QoR Measurement
 
 The [Titan benchmarks](https://docs.verilogtorouting.org/en/latest/vtr/benchmarks/#titan-benchmarks) are a group of large benchmark circuits from a wide range of applications, which are compatible with the VTR project.
 The are typically used as post-technology mapped netlists which have been pre-synthesized with Quartus.
@@ -511,7 +515,7 @@ $ make get_titan_benchmarks
 #Move to the task directory
 $ cd vtr_flow/tasks
 
-#Run the VTR benchmarks
+#Run the Titan benchmarks
 $ ../scripts/run_vtr_task.py regression_tests/vtr_reg_nightly_test2/titan_quick_qor
 
 #Several days later... they complete
@@ -528,6 +532,44 @@ stratixiv_arch.timing.xml	stereo_vision_stratixiv_arch_timing.blif	0208312
 stratixiv_arch.timing.xml	cholesky_mc_stratixiv_arch_timing.blif  	0208312     	success   	     	140214             	108592               	67410               	5444                  	121         	90           	-1     	111   	151        	-1          -1      	5221059              	8.16972       	-454610             	-8.16972            	1518597          	15                               	0                     	0                    	2.38657e+08       	21915.3              	9.34704            	-531231     -9.34704 	0       	0       	211.12   	364.32    	490.24              	6356252    	-1          	-1
 ```
 
+### Example: NoC Benchmarks QoR Measurements
+NoC benchmarks currently include synthetic and MLP benchmarks. Synthetic benchmarks have various NoC traffic patters,
+bandwidth utilization, and latency requirements. High-quality NoC router placement solutions for these benchmarks are
+known. By comparing the known solutions with NoC router placement results, the developer can evaluate the sanity of 
+the NoC router placement algorithm. MLP benchmarks are the only realistic netlists included in this benchmark set.
+
+Based on the number of NoC routers in a synthetic benchmark, it is run on one of two different architectures. All MLP
+benchmarks are run on an FPGA architecture with 16 NoC routers. Post-technology mapped netlists (blif files)
+for synthetic benchmarks are added to the VTR project. However, MLP blif files are very large and should be downloaded
+separately.
+
+Since NoC benchmarks target different FPGA architectures, they are run as different circuits. A typical way to run all
+NoC benchmarks is to run a task list and gather QoR data form different tasks:
+
+#### Running and Integrating the NoC Benchmarks with VTR
+```shell
+#From the VTR root
+
+#Download and integrate NoC MLP benchmarks into the VTR source tree
+$ make get_noc_mlp_benchmarks
+
+#Move to the task directory
+$ cd vtr_flow
+
+#Run the VTR benchmarks
+$ scripts/run_vtr_task.py -l tasks/noc_qor/task_list.txt
+
+#Several days later... they complete
+
+#NoC benchmarks are run as several different tasks. Therefore, QoR results should be gathered from multiple directories,
+#one for each task.
+$ head -5 tasks/noc_qor/large_complex_synthetic/latest/parse_results.txt
+$ head -5 tasks/noc_qor/large_simple_synthetic/latest/parse_results.txt
+$ head -5 tasks/noc_qor/small_complex_synthetic/latest/parse_results.txt
+$ head -5 tasks/noc_qor/small_simple_synthetic/latest/parse_results.txt
+$ head -5 tasks/noc_qor/MLP/latest/parse_results.txt
+```
+
 ### Example: Koios Benchmarks QoR Measurement
 
 The [Koios benchmarks](https://github.com/verilog-to-routing/vtr-verilog-to-routing/tree/master/vtr_flow/benchmarks/verilog/koios) are a group of Deep Learning benchmark circuits distributed with the VTR project.
 
@@ -4,7 +4,7 @@ Overview
 The VTR documentation is generated using sphinx, a python based documentation generator.
 
 The documentation itself is written in re-structured text (files ending in .rst), which
-is a lightwieght mark-up language for text documents.
+is a lightweight mark-up language for text documents.
 
 Currently VTR's documenation is automatically built by https://readthedocs.org/projects/vtr/ and is served at:
 
@@ -36,7 +36,7 @@ from the main documentation directory (i.e. <vtr_root>/doc).
 
 This will produce the output html in the _build directory.
 
-You can then view the resulting documention with the web-browser of your choice.
+You can then view the resulting documentation with the web-browser of your choice.
 For instance:
 
     $ firefox _build/html/index.html
 
@@ -350,7 +350,7 @@ Use the options below to override this default naming behaviour.
 
 .. option:: --read_rr_graph <file>
 
-    Reads in the routing resource graph named <file> loads it for use during the placement and routing stages. Expects a file extension of either ``.xml`` and ``.bin``.
+    Reads in the routing resource graph named <file> loads it for use during the placement and routing stages. Expects a file extension of either ``.xml`` or ``.bin``.
 
     The routing resource graph overthrows all the architecture definitions regarding switches, nodes, and edges. Other information such as grid information, block types, and segment information are matched with the architecture file to ensure accuracy.
 
@@ -368,22 +368,22 @@ Use the options below to override this default naming behaviour.
 
 .. option:: --read_router_lookahead <file>
 
-    Reads the lookahead data from the specified file instead of computing it.
+    Reads the lookahead data from the specified file instead of computing it. Expects a file extension of either ``.capnp`` or ``.bin``.
 
 .. option:: --write_router_lookahead <file>
 
-    Writes the lookahead data to the specified file.
+    Writes the lookahead data to the specified file. Accepted file extensions are ``.capnp``, ``.bin``, and ``.csv``.
 
 .. option:: --read_placement_delay_lookup <file>
 
-    Reads the placement delay lookup from the specified file instead of computing it.
+    Reads the placement delay lookup from the specified file instead of computing it. Expects a file extension of either ``.capnp`` or ``.bin``.
 
 .. option:: --write_placement_delay_lookup <file>
 
-    Writes the placement delay lookup to the specified file.
+    Writes the placement delay lookup to the specified file. Expects a file extension of either ``.capnp`` or ``.bin``.
 .. option:: --write_initial_place_file <file>
 
-    Writes out the the placement chosen by the initial placement algorithm to the specified file
+    Writes out the the placement chosen by the initial placement algorithm to the specified file.
 
 .. option:: --outfile_prefix <string>
 
 
@@ -191,7 +191,20 @@ The SymbiFlow benchmarks can be downloaded and extracted by running the followin
     cd $VTR_ROOT
     make get_symbiflow_benchmarks
 
-Once downloaded and extracted, benchmarks are provided as post-synthesized eblif files under: ::
+Once downloaded and extracted, benchmarks are provided as post-synthesized blif files under: ::
 
     $VTR_ROOT/vtr_flow/benchmarks/symbiflow
 
+.. _noc_benchmarks:
+
+NoC Benchmarks
+----------------
+NoC benchmarks are composed of synthetic and MLP benchmarks and target NoC-enhanced FPGA architectures. Synthetic
+benchmarks include a wide variety of traffic flow patters and are divided into two groups: 1) simple and 2) complex
+benchmarks. As their names imply, simple benchmarks use very simple and small logic modules connected to NoC routers,
+while complex benchmarks implement more complicated functionalities like encryption. These benchmarks do not come from
+real application domains. On the other hand, MLP benchmarks include modules that perform matrix-vector multiplication
+and move data. Pre-synthesized netlists for the synthetic benchmarks are added to VTR project, but MLP netlists should
+be downloaded separately.
+
+.. note:: The NoC MLP benchmarks are not included with the VTR release (due to their size). However they can be downloaded and extracted by running ``make get_noc_mlp_benchmarks`` from the root of the VTR tree.  They can also be `downloaded manually <https://www.eecg.utoronto.ca/~vaughn/titan/>`_.
@@ -31,6 +31,16 @@ The official VTR release is available from:
 
     https://verilogtorouting.org/download
 
+VTR Docker Image
+~~~~~~~~~~~~~~~~
+A docker image for VTR is available. This image provides all the required packages and python libraries required. However, this ease to compile and run comes at the cost of some runtime increase (<10%). To pull and run the docker image of latest VTR repository, you can run the following commands:
+
+.. code-block:: bash
+
+    > sudo docker pull mohamedelgammal/vtr-master:latest
+    > sudo docker run -it mohamedelgammal/vtr-master:latest
+
+
 Release
 ~~~~~~~
 
 
@@ -2,7 +2,7 @@ libargparse
 ===========
 This is (yet another) simple command-line parser for C++ applications, inspired by Python's agparse module.
 
-It requires only a C++11 compiler, and has no external dependancies.
+It requires only a C++11 compiler, and has no external dependencies.
 
 One of the advantages of libargparse is that all conversions from command-line strings to program types (bool, int etc.) are performed when the command line is parsed (and not when the options are accessed).
 This avoids command-line related errors from showing up deep in the program execution, which can be problematic for long-running programs.
 
@@ -399,10 +399,11 @@ int main(
             .show_in(argparse::ShowIn::HELP_ONLY);
     route_grp.add_argument(args.router_algorithm, "--router_algorithm")
             .help("Specifies the router algorithm to use.\n"
-                  " * parallel: timing_driven with tricks to run on multiple cores (may be worse)\n"
-                  " * timing driven: focuses on routability and circuit speed\n")
+                  " * timing driven: focuses on routability and circuit speed [default]\n"
+                  " * parallel: timing_driven with nets in different regions of the chip routed in parallel\n"
+                  " * parallel_decomp: timing_driven with additional parallelism obtained by decomposing high-fanout nets, possibly reducing quality\n")
             .default_value("timing_driven")
-            .choices({"parallel", "timing_driven"})
+            .choices({"parallel", "parallel_decomp", "timing_driven"})
             .show_in(argparse::ShowIn::HELP_ONLY);
     route_grp.add_argument(args.min_incremental_reroute_fanout, "--min_incremental_reroute_fanout")
             .help("The net fanout thershold above which nets will be re-routed incrementally.")