diff --git a/.clang-format b/.clang-format
index fa6b26413d0..774aba72016 100644
--- a/.clang-format
+++ b/.clang-format
@@ -15,7 +15,7 @@ AllowShortIfStatementsOnASingleLine: true
AllowShortLoopsOnASingleLine: false
AlwaysBreakAfterDefinitionReturnType: None
AlwaysBreakAfterReturnType: None
-AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakBeforeMultilineStrings: false
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: false
@@ -34,7 +34,7 @@ BraceWrapping:
SplitEmptyFunction: false
SplitEmptyRecord: true
SplitEmptyNamespace: true
-BreakBeforeBinaryOperators: All
+BreakBeforeBinaryOperators: NonAssignment
BreakBeforeBraces: Custom
BreakBeforeInheritanceComma: false
BreakBeforeTernaryOperators: true
@@ -68,10 +68,11 @@ IncludeIsMainRegex: '([-_](test|unittest))?$'
IndentCaseLabels: true
IndentWidth: 4
IndentWrappedFunctionNames: false
-IndentPPDirectives: AfterHash
+IndentPPDirectives: None
+InsertNewlineAtEOF: true
JavaScriptQuotes: Leave
JavaScriptWrapImports: true
-KeepEmptyLinesAtTheStartOfBlocks: false
+KeepEmptyLinesAtTheStartOfBlocks: true
MacroBlockBegin: ''
MacroBlockEnd: ''
MaxEmptyLinesToKeep: 1
diff --git a/.github/scripts/install_dependencies.sh b/.github/scripts/install_dependencies.sh
index b5f5388f685..61ad671d30b 100755
--- a/.github/scripts/install_dependencies.sh
+++ b/.github/scripts/install_dependencies.sh
@@ -55,7 +55,7 @@ sudo apt install -y \
clang-16 \
clang-17 \
clang-18 \
- clang-format-14 \
+ clang-format-18 \
libtbb-dev \
openssl
diff --git a/.github/workflows/nightly_test.yml b/.github/workflows/nightly_test.yml
index a57c502e0d3..a6782293b5d 100644
--- a/.github/workflows/nightly_test.yml
+++ b/.github/workflows/nightly_test.yml
@@ -22,8 +22,6 @@ on:
# - '**.md'
# - '**.rst'
workflow_dispatch:
- schedule:
- - cron: '0 0 * * *' # daily
# We want to cancel previous runs for a given PR or branch / ref if another CI
# run is requested.
@@ -65,9 +63,9 @@ jobs:
- {test: "vtr_reg_strong", cores: "16", options: "", cmake: "-DVTR_ASSERT_LEVEL=3", extra_pkgs: "libeigen3-dev"}
- {test: "vtr_reg_strong_odin", cores: "16", options: "", cmake: "-DVTR_ASSERT_LEVEL=3 -DWITH_ODIN=ON", extra_pkgs: "libeigen3-dev"}
- {test: "vtr_reg_strong_odin", cores: "16", options: "-skip_qor", cmake: "-DVTR_ASSERT_LEVEL=3 -DVTR_ENABLE_SANITIZE=ON -DWITH_ODIN=ON", extra_pkgs: "libeigen3-dev"}
- # - {test: "vtr_reg_system_verilog", cores: "16", options: "", cmake: "-DYOSYS_F4PGA_PLUGINS=ON", extra_pkgs: ""} # Test turned off -> F4PGA conflicts with Yosys (version 42)
+ - {test: "vtr_reg_system_verilog", cores: "16", options: "", cmake: "-DSYNLIG_SYSTEMVERILOG=ON", extra_pkgs: ""}
- {test: "odin_reg_strong", cores: "16", options: "", cmake: "-DWITH_ODIN=ON", extra_pkgs: ""}
- - {test: "parmys_reg_strong", cores: "16", options: "", cmake: "-DYOSYS_F4PGA_PLUGINS=OFF", extra_pkgs: ""}
+ - {test: "parmys_reg_strong", cores: "16", options: "", cmake: "-DSYNLIG_SYSTEMVERILOG=ON", extra_pkgs: ""}
env:
DEBIAN_FRONTEND: "noninteractive"
diff --git a/.github/workflows/nightly_test_manual.yml b/.github/workflows/nightly_test_manual.yml
new file mode 100644
index 00000000000..f98f412eb8c
--- /dev/null
+++ b/.github/workflows/nightly_test_manual.yml
@@ -0,0 +1,106 @@
+name: NightlyTestManual
+
+# This workflow can only be dispatched.
+on:
+ workflow_dispatch:
+
+ # Automatically runs every Sunday 5 AM UTC.
+ # Results should be ready ~15 hours later (Sunday 8 PM UTC), on time for Monday mornings.
+ schedule:
+ - cron: '0 5 * * 0'
+
+# We want to cancel previous runs for a given PR or branch / ref if another CI
+# run is requested.
+# See: https://docs.github.com/en/actions/using-jobs/using-concurrency
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+ cancel-in-progress: true
+
+env:
+ # default compiler for all non-compatibility tests
+ MATRIX_EVAL: "CC=gcc-13 && CXX=g++-13"
+
+jobs:
+ Run-tests:
+ # Prevents from running on forks where no custom runners are available
+ if: ${{ github.repository_owner == 'verilog-to-routing' }}
+
+ name: 'Nightly Tests Manual Run'
+ # This workflow is expected to take around 19 hours. Giving it 24 hours
+ # before timing out.
+ timeout-minutes: 1440
+ runs-on: [self-hosted, Linux, X64, SAVI]
+
+ steps:
+ # Clean previous runs of this workflow.
+ - name: 'Cleanup build folder'
+ run: |
+ rm -rf ./* || true
+ rm -rf ./.??* || true
+
+ # Checkout the VTR repo.
+ - uses: actions/checkout@v4
+ with:
+ submodules: 'true'
+
+ # Get the extra benchmarks
+ - name: 'Get Extra Benchmarks'
+ run: |
+ make get_titan_benchmarks
+ make get_ispd_benchmarks
+ ./dev/upgrade_vtr_archs.sh
+ make get_symbiflow_benchmarks
+
+ # Build VTR using the default build options.
+ - name: 'Build VTR'
+ run: |
+ make -j12
+ make env
+ source .venv/bin/activate
+ pip install -r requirements.txt
+
+ # Run all of the nightly tests.
+ # TODO: We could expose more parallelism if we had one task list which ran
+ # all of these.
+ - name: 'Run Nightly Test 1'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test1
+
+ - name: 'Run Nightly Test 2'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test2
+
+ - name: 'Run Nightly Test 3'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test3
+
+
+ - name: 'Run Nightly Test 4'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test4
+
+ - name: 'Run Nightly Test 5'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test5
+
+ - name: 'Run Nightly Test 6'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test6
+
+ - name: 'Run Nightly Test 7'
+ if: success() || failure()
+ run: |
+ source .venv/bin/activate
+ ./run_reg_test.py -j12 vtr_reg_nightly_test7
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
new file mode 100644
index 00000000000..5f6e9fd9088
--- /dev/null
+++ b/.github/workflows/stale.yml
@@ -0,0 +1,31 @@
+name: 'Close Stale Issues'
+on:
+ schedule:
+ # Run everyday at 1 PM UTC
+ - cron: '0 13 * * *'
+
+jobs:
+ stale:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/stale@v9
+ with:
+ # The message to be shown for stale issues
+ stale-issue-message: 'This issue has been inactive for a year and has been marked as stale. It will be closed in 15 days if it continues to be stale. If you believe this is still an issue, please add a comment.'
+ close-issue-message: 'This issue has been marked stale for 15 days and has been automatically closed.'
+ # If you want to exempt an issue from being marked stale/deleted, label it as 'no-stale'
+ exempt-issue-labels: 'no-stale'
+ days-before-issue-stale: 365
+ days-before-issue-close: 15
+ # Start from the oldest issues
+ ascending: true
+
+ # The configuration below can be used to allow the same behaviour with PRs.
+ # Since we currently don't want to close old PRs, it is commented out but
+ # left here in case we change our mind.
+
+ # stale-pr-message: 'This PR has been inactive for a year and has been marked as stale. It will be closed in 15 days if it continues to be stale. If you are still working on this PR, please add a comment.'
+ # close-pr-message: 'This PR has been marked stale for 15 days and has been automatically closed.'
+ # exempt-pr-labels: 'no-stale'
+ # days-before-pr-stale: 365
+ # days-before-pr-close: 15
\ No newline at end of file
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index b6c665ff3ed..5977e1221ba 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -89,6 +89,8 @@ jobs:
with:
python-version: 3.10.10
- uses: actions/checkout@v4
+ with:
+ submodules: 'true'
- name: Install dependencies
run: ./.github/scripts/install_dependencies.sh
@@ -439,7 +441,6 @@ jobs:
- { name: 'GCC 11 (Ubuntu Noble - 24.04)', eval: 'CC=gcc-11 && CXX=g++-11', }
- { name: 'GCC 12 (Ubuntu Noble - 24.04)', eval: 'CC=gcc-12 && CXX=g++-12', }
- { name: 'GCC 14 (Ubuntu Noble - 24.04)', eval: 'CC=gcc-14 && CXX=g++-14', }
- - { name: 'Clang 15 (Ubuntu Noble - 24.04)', eval: 'CC=clang-15 && CXX=clang++-15', }
- { name: 'Clang 16 (Ubuntu Noble - 24.04)', eval: 'CC=clang-16 && CXX=clang++-16', }
- { name: 'Clang 17 (Ubuntu Noble - 24.04)', eval: 'CC=clang-17 && CXX=clang++-17', }
- { name: 'Clang 18 (Ubuntu Noble - 24.04)', eval: 'CC=clang-18 && CXX=clang++-18', }
diff --git a/.gitmodules b/.gitmodules
index d8738f84253..8a35f2bff8c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,8 @@
[submodule "libs/EXTERNAL/libcatch2"]
path = libs/EXTERNAL/libcatch2
url = https://github.com/catchorg/Catch2.git
+
+# fork where in branch v1.0.0_no_complication_warnings there are compilation warnings fixes for upstream tag v1.0.0 of sockpp
[submodule "libs/EXTERNAL/sockpp"]
path = libs/EXTERNAL/sockpp
- url = https://github.com/w0lek/sockpp.git # fork where in branch v1.0.0_no_complication_warnings there are compilation warnings fixes for upstream tag v1.0.0 of sockpp
+ url = https://github.com/w0lek/sockpp.git
diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile
index b738f995a69..bb6524f7f53 100644
--- a/.gitpod.Dockerfile
+++ b/.gitpod.Dockerfile
@@ -30,6 +30,7 @@ RUN apt-get update \
python-lxml \
qt5-default \
wget \
+ default-jre \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index ba313c7720d..d86dda1c83d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -47,6 +47,64 @@ _The following are changes which have been implemented in the VTR master branch
### Removed
+
+## v9.0.0 - 2024-12-23
+
+### Added
+ * Support for Advanced Architectures:
+ * 3D FPGA and RAD architectures.
+ * Architectures with hard Networks-on-Chip (NoCs).
+ * Distinct horizontal and vertical channel widths and types.
+ * Diagonal routing wires and other complex wire shapes (L-shaped, T-shaped, ....).
+
+ * New Benchmark Suites:
+ * Koios: A deep-learning-focused benchmark suite with various design sizes.
+ * Hermes: Benchmarks utilizing hard NoCs.
+ * TitanNew: Large benchmarks targeting the Stratix 10 architecture.
+
+ * Commercial FPGAs Architecture Captures:
+ * Intel’s Stratix 10 FPGA architecture.
+ * AMD’s 7-series FPGA architecture.
+
+ * Parmys Logic Synthesis Flow:
+ * Better Verilog language coverage
+ * More efficient hard block mapping
+
+ * VPR Graphics Visualizations:
+ * New interface for improved usability and underlying graphics rewritten using EZGL/GTK to allow more UI widgets.
+ * Algorithm breakpoint visualizations for placement and routing algorithm debugging.
+ * User-guided (manual) placement optimization features.
+ * Enabled a live connection for client graphical application to VTR engines through sockets (server mode).
+ * Interactive timing path analysis (IPA) client using server mode.
+
+ * Performance Enhancements:
+ * Parallel router for faster inter-cluster routing or flat routing.
+
+ * Re-clustering API to modify packing decisions during the flow.
+ * Support for floorplanning and placement constraints.
+ * Unified intra- and inter-cluster (flat) routing.
+ * Comprehensive web-based VTR utilities and API documentation.
+
+### Changed
+ * The default values of many command line options (e.g. inner_num is 0.5 instead of 1.0)
+ * Changes to placement engine
+ * Smart centroid initial placement algorithm.
+ * Multiple smart placement directed moves.
+ * Reinforcement learning-based placement algorithm.
+ * Changes to routing engine
+ * Faster lookahead creation.
+ * More accurate lookahead for large blocks.
+ * More efficient heap and pruning strategies.
+ * max `pres_fac` capped to avoid possible numeric issues.
+
+
+### Fixed
+ * Many algorithmic and coding bugs are fixed in this release
+
+### Removed
+ * Breadth-first (non-timing-driven) router.
+ * Non-linear congestion placement cost.
+
## v8.0.0 - 2020-03-24
### Added
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3ef16e0bcfc..653c3f17173 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -61,10 +61,10 @@ option(ODIN_SANITIZE "Enable building odin with sanitize flags" OFF)
# Allow the user to enable building Yosys
option(WITH_PARMYS "Enable Yosys as elaborator and parmys-plugin as partial mapper" ON)
-option(YOSYS_F4PGA_PLUGINS "Enable building and installing Yosys SystemVerilog and UHDM plugins" OFF)
+option(SYNLIG_SYSTEMVERILOG "Enable building and installing Synlig SystemVerilog and UHDM plugins" OFF)
-set(VTR_VERSION_MAJOR 8)
-set(VTR_VERSION_MINOR 1)
+set(VTR_VERSION_MAJOR 9)
+set(VTR_VERSION_MINOR 0)
set(VTR_VERSION_PATCH 0)
set(VTR_VERSION_PRERELEASE "dev")
@@ -94,9 +94,9 @@ add_definitions("-DVTR_ASSERT_LEVEL=${VTR_ASSERT_LEVEL}")
include(CheckCXXCompilerFlag)
#
-# We require c++17 support
+# We require c++20 support
#
-set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) #No compiler specific extensions
@@ -161,7 +161,7 @@ else()
"-Wcast-align" #Warn if a cast causes memory alignment changes
"-Wshadow" #Warn if local variable shadows another variable
"-Wformat=2" #Sanity checks for printf-like formatting
- "-Wno-format-nonliteral" # But don't worry about non-literal formtting (i.e. run-time printf format strings)
+ "-Wno-format-nonliteral" # But don't worry about non-literal formatting (i.e. run-time printf format strings)
"-Wlogical-op" #Checks for logical op when bit-wise expected
"-Wmissing-declarations" #Warn if a global function is defined with no declaration
"-Wmissing-include-dirs" #Warn if a user include directory is missing
@@ -179,10 +179,10 @@ else()
"-Wduplicated-cond" #Warn about identical conditions in if-else chains
"-Wduplicated-branches" #Warn when different branches of an if-else chain are equivalent
"-Wnull-dereference" #Warn about null pointer dereference execution paths
- "-Wuninitialized" #Warn about unitialized values
+ "-Wuninitialized" #Warn about uninitialized values
"-Winit-self" #Warn about self-initialization
"-Wcatch-value=3" #Warn when catch statements don't catch by reference
- "-Wextra-semi" #Warn about redudnant semicolons
+ "-Wextra-semi" #Warn about redundant semicolons
"-Wimplicit-fallthrough=3" #Warn about case fallthroughs, but allow 'fallthrough' comments to suppress warnings
#GCC-like optional
#"-Wsuggest-final-types" #Suggest where 'final' would help if specified on a type methods
@@ -454,7 +454,7 @@ if(${WITH_ODIN})
endif()
# handle cmake params to compile Yosys SystemVerilog/UHDM plugins
-if(${YOSYS_F4PGA_PLUGINS})
+if(${SYNLIG_SYSTEMVERILOG})
# avoid compiling plugins in case the Parmys frontend is not active
if(NOT ${WITH_PARMYS})
message(SEND_ERROR "Utilizing SystemVerilog/UHDM plugins requires activating Parmys frontend. Please set WITH_PARMYS.")
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f5030302bf9..2ebfababd8d 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -77,7 +77,7 @@ This information helps us to quickly reproduce (and hopefully fix) the issue:
Tell us what version of VTR you are using (e.g. the output of `vpr --version`), which Operating System and compiler you are using, or any other relevant information about where or how you are building/running VTR.
-Once you've gathered all the information [open an Issue](https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/new?template=bug_report.md) on our issue tracker.
+Once you've gathered all the information [open an Issue](https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/new?template=bug_report.md) on our issue tracker. Issues that do not have any activity for a year will be automatically marked as stale and will be closed after 15 days of being marked as stale.
If you know how to fix the issue, or already have it coded-up, please also consider [submitting the fix](#submitting-code-to-vtr).
This is likely the fastest way to get bugs fixed!
diff --git a/Dockerfile b/Dockerfile
index 71f5129301a..2b36ac5c5e5 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -15,6 +15,7 @@ RUN apt-get update -qq \
&& apt-get -y install --no-install-recommends \
wget \
ninja-build \
+ default-jre \
libeigen3-dev \
libtbb-dev \
python3-pip \
diff --git a/Makefile b/Makefile
index 06d2df4c158..109288f0344 100644
--- a/Makefile
+++ b/Makefile
@@ -48,14 +48,14 @@ ifeq ($(VERBOSE),1)
override CMAKE_PARAMS := -DVTR_ENABLE_VERBOSE=on ${CMAKE_PARAMS}
endif
-# -s : Suppresss makefile output (e.g. entering/leaving directories)
+# -s : Suppresses makefile output (e.g. entering/leaving directories)
# --output-sync target : For parallel compilation ensure output for each target is synchronized (make version >= 4.0)
MAKEFLAGS := -s
SOURCE_DIR := $(PWD)
BUILD_DIR ?= build
-#Check for the cmake exectuable
+#Check for the cmake executable
CMAKE := $(shell command -v cmake 2> /dev/null)
#Show test log on failures with 'make test'
diff --git a/README.developers.md b/README.developers.md
index 709a256c58c..866f8ca1dac 100644
--- a/README.developers.md
+++ b/README.developers.md
@@ -301,10 +301,13 @@ For the very large runs, you can submit your runs on a large cluster. A template
a Slurm-managed cluster can be found under vtr_flow/tasks/slurm/
## Continuous integration (CI)
+
+### Automatic (Github runner) CI tests
+
For the following tests, you can use remote servers instead of running them locally. Once the changes are pushed into the
remote repository, or a PR is created, the [Test Workflow](https://github.com/verilog-to-routing/vtr-verilog-to-routing/blob/master/.github/workflows/test.yml)
will be triggered. Many tests are included in the workflow, including:
-* [vtr_reg_nightly_test1-N](#vtr_reg_nightly_test1-N)
+* [vtr_reg_nightly_test1-N](#vtr_reg_nightly_test1-n)
* [vtr_reg_strong](#vtr_reg_strong)
* [vtr_reg_basic](#vtr_reg_basic)
* odin_reg_strong
@@ -312,6 +315,15 @@ will be triggered. Many tests are included in the workflow, including:
instructions on how to gather QoR results of CI runs can be found [here](#example-extracting-qor-data-from-ci-runs).
+### Manual Nightly Tests
+
+You can use remote servers to run the [vtr_reg_nightly_test1-7](#vtr_reg_nightly_test1-n) tests. These tests are triggered manually by going to the GitHub Actions menu, selecting the NightlyTestManual workflow and selecting run workflow on the branch you want to test. Once you do that, the [Nightly Test Manual Workflow](https://github.com/verilog-to-routing/vtr-verilog-to-routing/blob/master/.github/workflows/nightly_test_manual.yml) will be triggered. This run will take approximately 15 hours to complete and will cancel all other workflow runs for the same branch.
+
+
+
+
+
+
#### Re-run CI Tests
In the case that you want to re-run the CI tests, due to certain issues such as infrastructure failure,
go to the "Action" tab and find your workflow under Test Workflow.
@@ -637,6 +649,10 @@ They can be used for FPGA architecture exploration for DL and also for tuning CA
A typical approach to evaluating an algorithm change would be to run `koios_medium` (or `koios_medium_no_hb`) tasks from the nightly regression test (vtr_reg_nightly_test4), the `koios_large` (or `koios_large_no_hb`) and the `koios_proxy` (or `koios_proxy_no_hb`) tasks from the weekly regression test (vtr_reg_weekly). The nightly test contains smaller benchmarks, whereas the large designs are in the weekly regression test. To measure QoR for the entire benchmark suite, both nightly and weekly tests should be run and the results should be concatenated.
+As 3 of the `koios_large` circuits require special settings due to having long DSP chains, they are split in separate tasks as follows:
+ * `bwave_like.float.large.v` and `bwave_like.fixed.large.v` are in `vtr_reg_weekly/koios_bwave_large` task
+ * `dla_like.large.v` is in `vtr_reg_weekly/koios_dla_large` task
+
For evaluating an algorithm change in the Odin frontend, run `koios_medium` (or `koios_medium_no_hb`) tasks from the nightly regression test (vtr_reg_nightly_test4_odin) and the `koios_large_odin` (or `koios_large_no_hb_odin`) tasks from the weekly regression test (vtr_reg_weekly).
The `koios_medium`, `koios_large`, and `koios_proxy` regression tasks run these benchmarks with complex_dsp functionality enabled, whereas `koios_medium_no_hb`, `koios_large_no_hb` and `koios_proxy_no_hb` regression tasks run these benchmarks without complex_dsp functionality. Normally, only the `koios_medium`, `koios_large`, and `koios_proxy` tasks should be enough for QoR.
@@ -651,6 +667,8 @@ The following table provides details on available Koios settings in VTR flow:
| Nightly | Medium designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | ✓ | vtr_reg_nightly_test4_odin/koios_medium | Odin | |
| Nightly | Medium designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | | vtr_reg_nightly_test4_odin/koios_medium_no_hb | Odin | |
| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | ✓ | vtr_reg_weekly/koios_large | Parmys | |
+| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | ✓ | vtr_reg_weekly/koios_dla_large | Parmys | |
+| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | ✓ | vtr_reg_weekly/koios_bwave_large | Parmys | |
| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | | vtr_reg_weekly/koios_large_no_hb | Parmys | |
| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | ✓ | vtr_reg_weekly/koios_large_odin | Odin | |
| Weekly | Large designs | k6FracN10LB_mem20K_complexDSP_customSB_22nm.xml | | vtr_reg_weekly/koios_large_no_hb_odin | Odin | |
@@ -661,7 +679,15 @@ The following table provides details on available Koios settings in VTR flow:
For more information refer to the [Koios benchmark home page](vtr_flow/benchmarks/verilog/koios/README.md).
-The following steps show a sequence of commands to run the `koios` tasks on the Koios benchmarks:
+To make running all the koios benchmarks easier, especially with thos circuits scattered between different tasks, there is an overall task list that runs all the 40 circuits of Koios as follows (this will run all the circuits with complex DSP functionality enabled. If you want to disable the complex DSP, edit the file to point to the `koios_*_no_hb` tasks):
+
+```shell
+$ ../scripts/run_vtr_task.py -l koios_task_list.txt
+
+#Several hours later... they complete
+#
+
+If you want to run a subset of the koios benchmarks or run them without hard DSP blocks, you can run lower-level 'koios' tasks as follows:
```shell
#From the VTR root
@@ -681,17 +707,6 @@ $ ../scripts/run_vtr_task.py regression_tests/vtr_reg_weekly/koios_sv_no_hb &
#Several hours later... they complete
-#Parse the results
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_nightly_test4/koios_medium
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_large
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_proxy
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_sv
-
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_nightly_test4/koios_medium_no_hb
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_large_no_hb
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_proxy_no_hb
-$ ../scripts/python_libs/vtr/parse_vtr_task.py regression_tests/vtr_reg_weekly/koios_sv_no_hb
-
#The run directory should now contain a summary parse_results.txt file
$ head -5 vtr_reg_nightly_test4/koios_medium//parse_results.txt
arch circuit script_params vtr_flow_elapsed_time vtr_max_mem_stage vtr_max_mem error odin_synth_time max_odin_mem parmys_synth_time max_parmys_mem abc_depth abc_synth_time abc_cec_time abc_sec_time max_abc_mem ace_time max_ace_mem num_clb num_io num_memories num_mult vpr_status vpr_revision vpr_build_info vpr_compiler vpr_compiled hostname rundir max_vpr_mem num_primary_inputs num_primary_outputs num_pre_packed_nets num_pre_packed_blocks num_netlist_clocks num_post_packed_nets num_post_packed_blocks device_width device_height device_grid_tiles device_limiting_resources device_name pack_mem pack_time placed_wirelength_est place_mem place_time place_quench_time placed_CPD_est placed_setup_TNS_est placed_setup_WNS_est placed_geomean_nonvirtual_intradomain_critical_path_delay_est place_delay_matrix_lookup_time place_quench_timing_analysis_time place_quench_sta_time place_total_timing_analysis_time place_total_sta_time min_chan_width routed_wirelength min_chan_width_route_success_iteration logic_block_area_total logic_block_area_used min_chan_width_routing_area_total min_chan_width_routing_area_per_tile min_chan_width_route_time min_chan_width_total_timing_analysis_time min_chan_width_total_sta_time crit_path_routed_wirelength crit_path_route_success_iteration crit_path_total_nets_routed crit_path_total_connections_routed crit_path_total_heap_pushes crit_path_total_heap_pops critical_path_delay geomean_nonvirtual_intradomain_critical_path_delay setup_TNS setup_WNS hold_TNS hold_WNS crit_path_routing_area_total crit_path_routing_area_per_tile router_lookahead_computation_time crit_path_route_time crit_path_total_timing_analysis_time crit_path_total_sta_time
diff --git a/cmake/modules/AutoClangFormat.cmake b/cmake/modules/AutoClangFormat.cmake
index e4f6c3cb8a4..fb5e78d9f55 100644
--- a/cmake/modules/AutoClangFormat.cmake
+++ b/cmake/modules/AutoClangFormat.cmake
@@ -21,11 +21,11 @@ add_custom_target(format-cpp-files
COMMAND find ${DIRS_TO_FORMAT_CPP} ${FIND_TO_FORMAT_CPP})
#
-# Use clang-format-14 for code format
+# Use clang-format for code format
#
add_custom_target(format-cpp
COMMAND find ${DIRS_TO_FORMAT_CPP} ${FIND_TO_FORMAT_CPP} |
- xargs -P ${CPU_COUNT} clang-format-14 -style=file -i)
+ xargs -P ${CPU_COUNT} clang-format-18 -style=file -i)
#
# Use simple python script for fixing C like boxed comments
diff --git a/dev/pylint_check.py b/dev/pylint_check.py
index 96e78a2393d..4d2b9bba7c6 100755
--- a/dev/pylint_check.py
+++ b/dev/pylint_check.py
@@ -132,7 +132,7 @@ def expand_paths():
"""Build a list of all python files to process by going through 'paths_to_lint'"""
paths = []
- for (path, is_recursive) in paths_to_lint:
+ for path, is_recursive in paths_to_lint:
# Make sure all hard-coded paths point to .py files
if path.is_file():
if path.suffix.lower() != ".py":
diff --git a/dev/submit_slurm.py b/dev/submit_slurm.py
index fe6ef4b9ab5..f372f787e45 100755
--- a/dev/submit_slurm.py
+++ b/dev/submit_slurm.py
@@ -188,7 +188,7 @@ def get_resource_estimates(filepath):
mem_bytes = float(match.groupdict()["mem_bytes"])
time_minutes = time_sec / 60
- mem_mb = mem_bytes / (1024 ** 2)
+ mem_mb = mem_bytes / (1024**2)
return time_minutes, mem_mb
diff --git a/dev/vtr_gdb_pretty_printers.py b/dev/vtr_gdb_pretty_printers.py
index 86e2e14074d..3f4b7a951b3 100644
--- a/dev/vtr_gdb_pretty_printers.py
+++ b/dev/vtr_gdb_pretty_printers.py
@@ -7,8 +7,10 @@
https://docs.verilogtorouting.org/en/latest/dev/developing#vtr-pretty-printers
"""
+
import re
+
# VTR related
class VtrStrongIdPrinter:
def __init__(self, val, typename="vtr::StrongId"):
diff --git a/doc/_doxygen/vpr.dox b/doc/_doxygen/vpr.dox
index 5d2933b5295..79e58955469 100644
--- a/doc/_doxygen/vpr.dox
+++ b/doc/_doxygen/vpr.dox
@@ -6,7 +6,7 @@ EXTRACT_ALL = YES
EXTRACT_PRIVATE = YES
EXTRACT_STATIC = YES
WARN_IF_UNDOCUMENTED = NO
-INPUT = ../../vpr
+INPUT = ../../vpr ../../libs/libarchfpga/
RECURSIVE = YES
GENERATE_HTML = NO
GENERATE_LATEX = NO
diff --git a/doc/src/api/vpr/mapping.rst b/doc/src/api/vpr/mapping.rst
index ef573d0e973..f46436e1897 100644
--- a/doc/src/api/vpr/mapping.rst
+++ b/doc/src/api/vpr/mapping.rst
@@ -1,7 +1,7 @@
===============
Netlist mapping
===============
-As shown in the previous section, there are multiple levels of abstraction (multiple netlists) in VPR which are the ClusteredNetlist and the AtomNetlist. To fully use these netlists, we provide some functions to map between them.
+As shown in the previous section, there are multiple levels of abstraction (multiple netlists) in VPR which are the ClusteredNetlist and the AtomNetlist. To fully use these netlists, we provide some functions to map between them.
In this section, we will state how to map between the atom and clustered netlists.
@@ -16,11 +16,11 @@ To get the block Id of a cluster in the ClusteredNetlist from the block Id of on
.. code-block:: cpp
- ClusterBlockId clb_index = g_vpr_ctx.atom().lookup.atom_clb(atom_blk_id);
+ ClusterBlockId clb_index = g_vpr_ctx.atom().lookup().atom_clb(atom_blk_id);
* Using re_cluster_util.h helper functions
-
+
.. code-block:: cpp
ClusterBlockId clb_index = atom_to_cluster(atom_blk_id);
@@ -53,7 +53,7 @@ To get the net Id in the ClusteredNetlist from its Id in the AtomNetlist, use At
.. code-block:: cpp
- ClusterNetId clb_net = g_vpr_ctx.atom().lookup.clb_net(atom_net);
+ ClusterNetId clb_net = g_vpr_ctx.atom().lookup().clb_net(atom_net);
Cluster net Id to Atom net Id
@@ -62,4 +62,4 @@ To get the net Id in the AtomNetlist from its Id in the ClusteredNetlist, use At
.. code-block:: cpp
- ClusterNetId atom_net = g_vpr_ctx.atom().lookup.atom_net(clb_net);
+ ClusterNetId atom_net = g_vpr_ctx.atom().lookup().atom_net(clb_net);
diff --git a/doc/src/api/vtrutil/index.rst b/doc/src/api/vtrutil/index.rst
index 6d44b381a2a..84dc4ede829 100644
--- a/doc/src/api/vtrutil/index.rst
+++ b/doc/src/api/vtrutil/index.rst
@@ -11,4 +11,5 @@ VTRUTIL API
container_utils
logging
geometry
+ parallel
other
diff --git a/doc/src/api/vtrutil/parallel.rst b/doc/src/api/vtrutil/parallel.rst
new file mode 100644
index 00000000000..a8f9b9bbfa0
--- /dev/null
+++ b/doc/src/api/vtrutil/parallel.rst
@@ -0,0 +1,13 @@
+=====
+Parallel
+=====
+
+vtr_thread_pool
+-------------
+.. doxygenfile:: vtr_thread_pool.h
+ :project: vtr
+ :sections: briefdescription detaileddescription func innernamespace enum
+
+.. doxygenclass:: vtr::thread_pool
+ :project: vtr
+ :members:
diff --git a/doc/src/arch/reference.rst b/doc/src/arch/reference.rst
index b8fccc6579a..681a6607818 100644
--- a/doc/src/arch/reference.rst
+++ b/doc/src/arch/reference.rst
@@ -849,7 +849,7 @@ Each tile type is specified with the ```` tag withing the ```` tag.
Tile
~~~~
-.. arch:tag::
+.. arch:tag::
A tile refers to a placeable element within an FPGA architecture and describes its physical compositions on the grid.
The following attributes are applicable to each tile.
diff --git a/doc/src/dev/run_ci_manual/run_workflow.png b/doc/src/dev/run_ci_manual/run_workflow.png
new file mode 100644
index 00000000000..ba6c9a54afd
Binary files /dev/null and b/doc/src/dev/run_ci_manual/run_workflow.png differ
diff --git a/doc/src/dev/run_ci_manual/select_actions.png b/doc/src/dev/run_ci_manual/select_actions.png
new file mode 100644
index 00000000000..444ec464932
Binary files /dev/null and b/doc/src/dev/run_ci_manual/select_actions.png differ
diff --git a/doc/src/dev/run_ci_manual/select_workflow.png b/doc/src/dev/run_ci_manual/select_workflow.png
new file mode 100644
index 00000000000..42f8c5a8b36
Binary files /dev/null and b/doc/src/dev/run_ci_manual/select_workflow.png differ
diff --git a/doc/src/parmys/quickstart.rst b/doc/src/parmys/quickstart.rst
index eea5b3edb5b..11a8c527609 100644
--- a/doc/src/parmys/quickstart.rst
+++ b/doc/src/parmys/quickstart.rst
@@ -37,8 +37,8 @@ To build the VTR flow with the Parmys front-end you may use the VTR Makefile wra
.. note::
- Compiling the VTR flow with the ``-DYOSYS_F4PGA_PLUGINS=ON`` flag is required to build and install Yosys SystemVerilog and UHDM plugins.
- Using this compile flag, the `Yosys-F4PGA-Plugins `_ and `Surelog `_ repositories are cloned in the ``$VTR_ROOT/libs/EXTERNAL`` directory and then will be compiled and added as external plugins to the Parmys front-end.
+ Compiling the VTR flow with the ``-DSynlig_SystemVerilog=ON`` flag is required to build and install Synlig SystemVerilog and UHDM plugins.
+ Using this compile flag, the `Synlig_SystemVerilog `_ repository is cloned and installed in the ``$VTR_ROOT/build/bin`` directory and then will be compiled and added as external tool to the Parmys front-end.
Basic Usage
-----------
diff --git a/doc/src/quickstart/index.rst b/doc/src/quickstart/index.rst
index 0621ae0cd81..242079bef99 100644
--- a/doc/src/quickstart/index.rst
+++ b/doc/src/quickstart/index.rst
@@ -24,12 +24,18 @@ If you cloned the repository, you will need to set up the git submodules (if you
> git submodule init
> git submodule update
-VTR requires several system packages and Python packages to build and run the flow. You can install the required system packages using the following command (this works on Ubuntu 18.04, 20.04 and 22.04, but you may require different packages on other Linux distributions). Our CI testing is on Ubuntu 22.04, so that is the best tested platform and recommended for development.
+VTR requires several system packages and Python packages to build and run the flow. Ubuntu users can install the required system packages using the following command (this works on Ubuntu 18.04, 20.04, 22.04 and 24.04, but you may require different packages on other Linux distributions). Our CI testing is on Ubuntu 24.04, so that is the best tested platform and recommended for development.
.. code-block:: bash
> ./install_apt_packages.sh
+Fedora and RHEL users may use the following command to install the required system packages.
+
+.. code-block:: bash
+
+ > ./install_dnf_packages.sh
+
Then, to install the required Python packages (optionally within a new Python virtual environment):
.. code-block:: bash
diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
index c33d4352fef..f21ee85f1eb 100644
--- a/doc/src/vpr/command_line_usage.rst
+++ b/doc/src/vpr/command_line_usage.rst
@@ -89,6 +89,8 @@ VPR runs all stages of (pack, place, route, and analysis) if none of :option:`--
as such, the :option:`--pack` and :option:`--place` options should not be set when this option is set.
This flow requires that the device has a fixed size and some of the primitive blocks are fixed somewhere on the device grid.
+ .. seealso:: See :ref:`analytical_placement_options` for the options for this flow.
+
.. seealso:: See :ref:`Fixed FPGA Grid Layout ` and :option:`--device` for how to fix the device size.
.. seealso:: See :ref:`VPR Placement Constraints ` for how to fix primitive blocks in a design to the device grid.
@@ -377,7 +379,15 @@ Use the options below to override this default naming behaviour.
.. seealso:: :ref:`Routing Resource XML File `.
-.. option:: --read_vpr_constraints ::...:
+.. option:: --read_rr_edge_override
+
+ Reads a file that overrides the intrinsic delay of specific edges in RR graph.
+
+ This option should be used with both :option:`--read_rr_graph` and :option:`--write_rr_graph`. When used this way,
+ VPR reads the RR graph, updates the delays of selected edges using :option:`--read_rr_edge_override`,
+ and writes the updated RR graph. The modified RR graph can then be used in later VPR runs.
+
+.. option:: --read_vpr_constraints
Reads the :ref:`VPR constraints ` that the flow must respect from the specified XML file.
@@ -408,6 +418,50 @@ Use the options below to override this default naming behaviour.
Prefix for output files
+.. option:: --read_flat_place
+
+ Reads a file containing the locations of each atom on the FPGA.
+ This is used by the packer to better cluster atoms together.
+
+ The flat placement file (which often ends in ``.fplace``) is a text file
+ where each line describes the location of an atom. Each line in the flat
+ placement file should have the following syntax:
+
+ .. code-block:: none
+
+
+
+ For example:
+
+ .. code-block:: none
+
+ n523 6 8 0 0 3
+ n522 6 8 0 0 5
+ n520 6 8 0 0 2
+ n518 6 8 0 0 16
+
+ The position of the atom on the FPGA is given by 3 floating point values
+ (``x``, ``y``, ``layer``). We allow for the positions of atom to be not
+ quite legal (ok to be off-grid) since this flat placement will be fed into
+ the packer and placer, which will snap the positions to grid locations. By
+ allowing for off-grid positions, the packer can better trade-off where to
+ move atom blocks if they cannot be placed at the given position.
+ For 2D FPGA architectures, the ``layer`` should be 0.
+
+ The ``sub_tile`` is a clustered placement construct: which cluster-level
+ location at a given (x, y, layer) should these atoms go at (relevant when
+ multiple clusters can be stacked there). A sub-tile of -1 may be used when
+ the sub-tile of an atom is unkown (allowing the packing algorithm to choose
+ any sub-tile at the given (x, y, layer) location).
+
+ The ``site_idx`` is an optional index into a linearized list of primitive
+ locations within a cluster-level block which may be used as a hint to
+ reconstruct clusters.
+
+ .. warning::
+
+ This interface is currently experimental and under active development.
+
.. option:: --write_flat_place
Writes the post-placement locations of each atom into a flat placement file.
@@ -611,7 +665,7 @@ For people not working on CAD, you can probably leave all the options to their d
.. note::
- If a pin utilization target is unspecified it defaults to 1.0 (i.e. 100% utilization).
+ If some pin utilizations are specified, ``auto`` mode is turned off and the utilization target for any unspecified pin types defaults to 1.0 (i.e. 100% utilization).
For example:
@@ -830,55 +884,9 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe
**Default:** ``0.0``
-.. _dusty_sa_options:
-Setting any of the following 5 options selects :ref:`Dusty's annealing schedule ` .
-
-.. option:: --alpha_min
-
- The minimum (starting) update factor (alpha) used.
- Ranges between 0 and alpha_max.
-
- **Default:** ``0.2``
-
-.. option:: --alpha_max
-
- The maximum (stopping) update factor (alpha) used after which simulated annealing will complete.
- Ranges between alpha_min and 1.
-
- **Default:** ``0.9``
-
-.. option:: --alpha_decay
-
- The rate at which alpha will approach 1: alpha(n) = 1 - (1 - alpha(n-1)) * alpha_decay
- Ranges between 0 and 1.
-
- **Default:** ``0.7``
-
-.. option:: --anneal_success_min
-
- The minimum success ratio after which the temperature will reset to maintain the target success ratio.
- Ranges between 0 and anneal_success_target.
-
- **Default:** ``0.1``
-
-.. option:: --anneal_success_target
-
- The temperature after each reset is selected to keep this target success ratio.
- Ranges between anneal_success_target and 1.
-
- **Default:** ``0.25``
-
-.. option:: --place_cost_exp
-
- Wiring cost is divided by the average channel width over a net's bounding box
- taken to this exponent. Only impacts devices with different channel widths in
- different directions or regions.
-
- **Default:** ``1``
-
.. option:: --RL_agent_placement {on | off}
- Uses a Reinforcement Learning (RL) agent in choosing the appropiate move type in placement.
+ Uses a Reinforcement Learning (RL) agent in choosing the appropriate move type in placement.
It activates the RL agent placement instead of using a fixed probability for each move type.
**Default:** ``on``
@@ -907,7 +915,7 @@ Setting any of the following 5 options selects :ref:`Dusty's annealing schedule
Controls how quickly the agent's memory decays. Values between [0., 1.] specify
the fraction of weight in the exponentially weighted reward average applied to moves
- which occured greater than moves_per_temp moves ago. Values < 0 cause the
+ which occurred greater than moves_per_temp moves ago. Values < 0 cause the
unweighted reward sample average to be used (all samples are weighted equally)
**Default:** ``0.05``
@@ -926,6 +934,15 @@ Setting any of the following 5 options selects :ref:`Dusty's annealing schedule
**Default:** ``move_block_type``
+.. option:: --place_quench_only {on | off}
+
+ If this option is set to ``on``, the placement will skip the annealing phase and only perform the placement quench.
+ This option is useful when the the quality of initial placement is good enough and there is no need to perform the
+ annealing phase.
+
+ **Default:** ``off``
+
+
.. option:: --placer_debug_block
.. note:: This option is likely only of interest to developers debugging the placement algorithm
@@ -1023,7 +1040,7 @@ The following options are only valid when the placement engine is in timing-driv
.. option:: --place_delay_model_reducer {min, max, median, arithmean, geomean}
- When calculating delta delays for the placment delay model how are multiple values combined?
+ When calculating delta delays for the placement delay model how are multiple values combined?
**Default:** ``min``
@@ -1056,7 +1073,7 @@ The following options are only valid when the placement engine is in timing-driv
.. option:: --place_tsu_abs_margin
- Specifies an absolute offest added to cell setup times used by the placer.
+ Specifies an absolute offset added to cell setup times used by the placer.
This effectively controls whether the placer should try to achieve extra margin on setup paths.
For example a value of 500e-12 corresponds to requesting an extra 500ps of setup margin.
@@ -1064,7 +1081,7 @@ The following options are only valid when the placement engine is in timing-driv
.. option:: --post_place_timing_report
- Name of the post-placement timing report file to generate (not generated if unspecfied).
+ Name of the post-placement timing report file to generate (not generated if unspecified).
.. _noc_placement_options:
@@ -1163,6 +1180,114 @@ The following options are only used when FPGA device and netlist contain a NoC r
**Default:** ``vpr_noc_placement_output.txt``
+
+.. _analytical_placement_options:
+
+Analytical Placement Options
+^^^^^^^^^^^^^^^
+Instead of Packing atoms into clusters and placing the clusters into valid tile
+sites on the FPGA, Analytical Placement uses analytical techniques to place atoms
+on the FPGA device by relaxing the constraints on where they can be placed. This
+atom-level placement is then legalized into a clustered placement and passed into
+the router in VPR.
+
+Analytical Placement is generally split into three stages:
+
+* Global Placement: Uses analytical techniques to place atoms on the FPGA grid.
+
+* Full Legalization: Legalizes a flat (atom) placement into legal clusters placed on the FPGA grid.
+
+* Detailed Placement: While keeping the clusters legal, performs optimizations on the clustered placement.
+
+.. warning::
+
+ Analytical Placement is experimental and under active development.
+
+.. option:: --ap_analytical_solver {qp-hybrid | lp-b2b}
+
+ Controls which Analytical Solver the Global Placer will use in the AP Flow.
+ The Analytical Solver solves for a placement which optimizes some objective
+ function, ignorant of the FPGA legality constraints. This provides a "lower-
+ bound" solution. The Global Placer will legalize this solution and feed it
+ back to the analytical solver to make its solution more legal.
+
+ * ``qp-hybrid`` Solves for a placement that minimizes the quadratic HPWL of
+ the flat placement using a hybrid clique/star net model (as described in
+ FastPlace :cite:`Viswanathan2005_FastPlace`).
+ Uses the legalized solution as anchor-points to pull the solution to a
+ more legal solution (similar to the approach from SimPL :cite:`Kim2013_SimPL`).
+
+ * ``lp-b2b`` Solves for a placement that minimizes the linear HPWL of the
+ flat placement using the Bound2Bound net model (as described in Kraftwerk2 :cite:`Spindler2008_Kraftwerk2`).
+ Uses the legalized solution as anchor-points to pull the solution to a
+ more legal solution (similar to the approach from SimPL :cite:`Kim2013_SimPL`).
+
+ **Default:** ``lp-b2b``
+
+.. option:: --ap_partial_legalizer {bipartitioning | flow-based}
+
+ Controls which Partial Legalizer the Global Placer will use in the AP Flow.
+ The Partial Legalizer legalizes a placement generated by an Analytical Solver.
+ It is used within the Global Placer to guide the solver to a more legal
+ solution.
+
+ * ``bipartitioning`` Creates minimum windows around over-dense regions of
+ the device bi-partitions the atoms in these windows such that the region
+ is no longer over-dense and the atoms are in tiles that they can be placed
+ into.
+
+ * ``flow-based`` Flows atoms from regions that are overfilled to regions that
+ are underfilled.
+
+ **Default:** ``bipartitioning``
+
+.. option:: --ap_full_legalizer {naive | appack}
+
+ Controls which Full Legalizer to use in the AP Flow.
+
+ * ``naive`` Use a Naive Full Legalizer which will try to create clusters exactly where their atoms are placed.
+
+ * ``appack`` Use APPack, which takes the Packer in VPR and uses the flat atom placement to create better clusters.
+
+ **Default:** ``appack``
+
+.. option:: --ap_detailed_placer {none | annealer}
+
+ Controls which Detailed Placer to use in the AP Flow.
+
+ * ``none`` Do not use any Detailed Placer.
+
+ * ``annealer`` Use the Annealer from the Placement stage as a Detailed Placer. This will use the same Placer Options from the Place stage to configure the annealer.
+
+ **Default:** ``annealer``
+
+.. option:: --ap_timing_tradeoff
+
+ Controls the trade-off between wirelength (HPWL) and delay minimization in the AP flow.
+
+ A value of 0.0 makes the AP flow focus completely on wirelength minimization,
+ while a value of 1.0 makes the AP flow focus completely on timing optimization.
+
+ **Default:** ``0.5``
+
+.. option:: --ap_verbosity
+
+ Controls the verbosity of the AP flow output.
+ Larger values produce more detailed output, which may be useful for
+ debugging the algorithms in the AP flow.
+
+ * ``1 <= verbosity < 10`` Print standard, stage-level messages. This will
+ print messages at the GP, FL, or DP level.
+
+ * ``10 <= verbosity < 20`` Print more detailed messages of what is happening
+ within stages. For example, show high-level information on the legalization
+ iterations within the Global Placer.
+
+ * ``20 <= verbosity`` Print very detailed messages on intra-stage algorithms.
+
+ **Default:** ``1``
+
+
.. _router_options:
Router Options
@@ -1179,7 +1304,7 @@ VPR uses a negotiated congestion algorithm (based on Pathfinder) to perform rout
This means that during the routing stage, all nets, both intra- and inter-cluster, are routed directly from one primitive pin to another primitive pin.
This increases routing time but can improve routing quality by re-arranging LUT inputs and exposing additional optimization opportunities in architectures with local intra-cluster routing that is not a full crossbar.
- **Default:** ``OFF`
+ **Default:** ``off``
.. option:: --max_router_iterations
diff --git a/doc/src/vpr/file_formats.rst b/doc/src/vpr/file_formats.rst
index dc76e2ff5ea..32fbb0dfc93 100644
--- a/doc/src/vpr/file_formats.rst
+++ b/doc/src/vpr/file_formats.rst
@@ -1100,6 +1100,28 @@ To aid in handling large graphs, rr_graph files can also be :ref:`saved in
-
+
@@ -78,7 +78,10 @@ An ```` tag is used to add an atom that must be constrained to the par
:req_param name_pattern:
The name of the atom.
-The ``name_pattern`` can be the exact name of the atom from the input atom netlist that was passed to VPR. It can also be a regular expression, in which case VPR will add all atoms from the netlist which have a portion of their name matching the regular expression to the partition. For example, if a module contains primitives named in the pattern of "alu[0]", "alu[1]", and "alu[2]", the regular expression "alu*" would add all of the primitives from that module.
+The ``name_pattern`` can either be the exact name of an atom from the input atom netlist passed to VPR, or a regular expression pattern matching one or more atom names. VPR first searches the netlist for an exact match. If no exact match is found, it then assumes that the given name is a regex pattern and searches for atoms whose names match the pattern.
+
+For example, to add all atoms ``alu[0]``, ``alu[1]``, and ``alu[2]`` to the partition ``Part0``, the user can use ``alu.*`` as the ``name_pattern`` in the ```` tag.
+
Region
^^^^^^
@@ -124,12 +127,3 @@ It is strongly recommended that different partitions do not overlap. The packing
blocks and the number of physical blocks in a region to decide pack atoms inside a partition more aggressively when
there are not enough resources in a partition. Overlapping partitions causes some physical blocks to be counted in more
than one partition.
-
-
-
-
-
-
-
-
-
diff --git a/doc/src/vtr/run_vtr_flow.rst b/doc/src/vtr/run_vtr_flow.rst
index 97782ec7a77..f61bdf3226e 100644
--- a/doc/src/vtr/run_vtr_flow.rst
+++ b/doc/src/vtr/run_vtr_flow.rst
@@ -73,15 +73,15 @@ The parser for these runs is considered the Yosys conventional Verilog/SystemVer
.. code-block:: bash
- # Using the Yosys-SystemVerilog plugin if installed, otherwise the Yosys conventional Verilog parser
+ # Using the Synlig System_Verilog tool if installed, otherwise the Yosys conventional Verilog parser
./run_vtr_flow -parser system-verilog
# Using the Surelog plugin if installed, otherwise failure on the unsupported file type
./run_vtr_flow -parser surelog
Running the default VTR flow using the Parmys standalone front-end.
-The Yosys HDL parser is considered as Yosys-SystemVerilog plugin (i.e., ``read_systemverilog``) and Yosys UHDM plugin (i.e., ``read_uhdm``), respectively.
-Utilizing Yosys plugins requires passing the ``-DYOSYS_F4PGA_PLUGINS=ON`` compile flag to build and install the plugins for the Parmys front-end.
+The Synlig HDL parser supports the (i.e., ``read_systemverilog``) and (i.e., ``read_uhdm``) commands. It utilizes Surelog for SystemVerilog 2017 processing and Yosys for synthesis.
+Enable Synlig tool with the ``-DSYNLIG_SYSTEMVERILOG=ON`` compile flag for the Parmys front-end.
.. code-block:: bash
diff --git a/doc/src/z_references.bib b/doc/src/z_references.bib
index 8822890713d..fc064f2c433 100644
--- a/doc/src/z_references.bib
+++ b/doc/src/z_references.bib
@@ -436,3 +436,46 @@ @inproceedings{kosar2024parallel
booktitle={The 23rd International Conference on Field-Programmable Technology},
year={2024}
}
+
+@ARTICLE{Viswanathan2005_FastPlace,
+ author={Viswanathan, N. and Chu, C.C.-N.},
+ journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+ title={{FastPlace}: efficient analytical placement using cell shifting, iterative local refinement,and a hybrid net model},
+ year={2005},
+ volume={24},
+ number={5},
+ month=may,
+ pages={722-733},
+ keywords={Clustering algorithms;Partitioning algorithms;Algorithm design and analysis;Integrated circuit interconnections;Large-scale systems;Minimization;Delay;Simulated annealing;Iterative algorithms;Acceleration;Analytical placement;computer-aided design;net models;standard cell placement},
+ doi={10.1109/TCAD.2005.846365}
+}
+
+@article{Kim2013_SimPL,
+ author = {Kim, Myung-Chul and Lee, Dong-Jin and Markov, Igor L.},
+ journal = {Commun. ACM},
+ title = {{SimPL}: an algorithm for placing {VLSI} circuits},
+ year = {2013},
+ issue_date = {June 2013},
+ publisher = {Association for Computing Machinery},
+ address = {New York, NY, USA},
+ volume = {56},
+ number = {6},
+ issn = {0001-0782},
+ doi = {10.1145/2461256.2461279},
+ month = jun,
+ pages = {105–113},
+ numpages = {9}
+}
+
+@ARTICLE{Spindler2008_Kraftwerk2,
+ author={Spindler, Peter and Schlichtmann, Ulf and Johannes, Frank M.},
+ journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+ title={Kraftwerk2—A Fast Force-Directed Quadratic Placement Approach Using an Accurate Net Model},
+ year={2008},
+ volume={27},
+ number={8},
+ month=aug,
+ pages={1398-1411},
+ keywords={Cost function;Central Processing Unit;Runtime;Quality control;Convergence;Computational efficiency;Integrated circuit synthesis;Stochastic processes;Circuit simulation;Bound2Bound;force-directed;half-perimeter wirelength (HPWL);Kraftwerk2;quadratic placement;Kraftwerk2;force-directed;quadratic placement;Bound2Bound;HPWL},
+ doi={10.1109/TCAD.2008.925783}
+}
diff --git a/install_apt_packages.sh b/install_apt_packages.sh
index 17d9aa49425..ede554a197c 100755
--- a/install_apt_packages.sh
+++ b/install_apt_packages.sh
@@ -36,6 +36,7 @@ sudo apt-get install -y \
libboost-system-dev \
libboost-python-dev \
libboost-filesystem-dev \
+ default-jre \
zlib1g-dev
# Required to build the documentation
diff --git a/install_dnf_packages.sh b/install_dnf_packages.sh
new file mode 100755
index 00000000000..b2a23fb7da5
--- /dev/null
+++ b/install_dnf_packages.sh
@@ -0,0 +1,52 @@
+sudo dnf upgrade --refresh
+
+# Base packages to compile and run basic regression tests
+sudo dnf install -y \
+ make \
+ cmake \
+ automake \
+ gcc \
+ gcc-c++ \
+ kernel-devel \
+ pkg-config \
+ bison \
+ flex \
+ python3-devel \
+ tbb-devel
+# Required for graphics
+sudo dnf install -y \
+ gtk3-devel \
+ libX11
+
+# Required for parmys front-end from https://github.com/YosysHQ/yosys
+sudo dnf install -y \
+ make \
+ automake \
+ gcc \
+ gcc-c++ \
+ kernel-devel \
+ clang \
+ bison \
+ flex \
+ readline-devel \
+ gawk \
+ tcl-devel \
+ libffi-devel \
+ git \
+ graphviz \
+ python-xdot \
+ pkg-config \
+ python3-devel \
+ boost-system \
+ boost-python3 \
+ boost-filesystem \
+ zlib-ng-devel
+
+# Required to build the documentation
+sudo dnf install -y \
+ python3-sphinx \
+ python-sphinx-doc
+
+# Required to run the analytical placement flow
+sudo dnf install -y \
+ eigen3-devel
\ No newline at end of file
diff --git a/libs/EXTERNAL/CMakeLists.txt b/libs/EXTERNAL/CMakeLists.txt
index a60c48958eb..f70f150fbe2 100644
--- a/libs/EXTERNAL/CMakeLists.txt
+++ b/libs/EXTERNAL/CMakeLists.txt
@@ -1,4 +1,5 @@
include(ExternalProject)
+include(ProcessorCount)
#Manually synchronized external libraries
add_subdirectory(libpugixml)
@@ -9,8 +10,17 @@ add_subdirectory(libsdcparse)
add_subdirectory(libblifparse)
add_subdirectory(libtatum)
add_subdirectory(libcatch2)
+#add_subdirectory(synlig)
#add_subdirectory(parmys)
+#Proc numbers
+ProcessorCount(PROCESSOR_COUNT)
+
+if(PROCESSOR_COUNT EQUAL 0)
+ # Fallback to 1 if the processor count cannot be determined
+ set(PROCESSOR_COUNT 1)
+endif()
+
#VPR_USE_SERVER is initialized in the root CMakeLists
#compile sockpp only if server mode is enabled
if (VPR_USE_SERVER)
@@ -27,8 +37,9 @@ endif ()
# The VTR root CMakeFile initializes the WITH_PARMYS
if (${WITH_PARMYS})
- # building Surelog and UHDM in a custom target to avoid any target conflict with VTR targets
- if (${YOSYS_F4PGA_PLUGINS})
+
+ if (${SYNLIG_SYSTEMVERILOG})
+
set(SURELOG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/Surelog)
set(SURELOG_BINARY_DIR ${SURELOG_SOURCE_DIR}/build)
set(YOSYS_F4PGA_PLUGINS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/yosys-f4pga-plugins)
@@ -62,74 +73,90 @@ if (${WITH_PARMYS})
LOG_OUTPUT_ON_FAILURE ON
# dependency
- DEPENDS yosys
+ DEPENDS yosys
)
- ExternalProject_Add(f4pga-plugins
- # root directory for the Yosys-F4PGA-Plugins project
- PREFIX "f4pga-plugins"
-
- GIT_REPOSITORY https://github.com/chipsalliance/yosys-f4pga-plugins.git
- GIT_TAG v1.20230808
- GIT_PROGRESS TRUE
- GIT_SHALLOW TRUE
-
- # setting source, build and install directories
- SOURCE_DIR "${YOSYS_F4PGA_PLUGINS_SOURCE_DIR}"
- BUILD_IN_SOURCE FALSE
- INSTALL_DIR ""
-
- INSTALL_COMMAND ""
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ${MAKE_PROGRAM} -C ${YOSYS_F4PGA_PLUGINS_SOURCE_DIR}
- PATH=${CMAKE_BINARY_DIR}/bin/:$ENV{PATH}
- UHDM_INSTALL_DIR=${CMAKE_BINARY_DIR}
- BUILD_DIR=${CMAKE_CURRENT_BINARY_DIR}/f4pga-plugins/$(NAME) # NAME will be resolved by yosys-f4pga-plugins Make
- YOSYS_PATH=${CMAKE_BINARY_DIR}
- install -j${CUSTOM_BUILD_PARALLEL_LEVEL}
-
- # redirect logs to a logfile
- LOG_BUILD ON
- LOG_UPDATE ON
- LOG_INSTALL ON
- LOG_CONFIGURE OFF
- LOG_OUTPUT_ON_FAILURE ON
+ # Synlig integration (manages Surelog and UHDM internally)
+
+ # Synlig integration (manages Surelog and UHDM internally)
+ set(SYNLIG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/synlig)
+
+ # Clone Synlig repository and ensure submodules are synced before building
+ ExternalProject_Add(synlig
+ PREFIX "synlig"
+
+ # Clone the Synlig repository
+ GIT_REPOSITORY https://github.com/chipsalliance/synlig.git
+ GIT_TAG main
+ GIT_PROGRESS TRUE
+ GIT_SHALLOW TRUE
+
+ # Set source and build directories
+ SOURCE_DIR "${SYNLIG_SOURCE_DIR}"
+ BUILD_IN_SOURCE FALSE
+ INSTALL_DIR " "
+
+ # Sync submodules after cloning
+
+
+ UPDATE_COMMAND git submodule update --init --recursive third_party/surelog
+ WORKING_DIRECTORY ${SYNLIG_SOURCE_DIR}
+
+ BUILD_COMMAND ${MAKE_PROGRAM} -C ${SYNLIG_SOURCE_DIR} install DESTDIR=${CMAKE_BINARY_DIR}/bin/synlig_install -j${PROCESSOR_COUNT}
+ INSTALL_COMMAND ""
+ CONFIGURE_COMMAND ""
+
+ # Pass necessary paths and set environment variables
+ CMAKE_CACHE_ARGS
+ "-DCMAKE_BUILD_TYPE:STRING=Release"
+ "-DSURELOG_PATH=${CMAKE_BINARY_DIR}/surelog"
+ "-DYOSYS_PATH=${CMAKE_BINARY_DIR}/yosys"
+ "-DUHDM_INSTALL_DIR=${CMAKE_BINARY_DIR}"
+ "-DBUILD_DIR=${CMAKE_BINARY_DIR}/synlig-build"
+ "-DEXPORT_PATH=${CMAKE_BINARY_DIR}/synlig_install/usr/local/bin:$ENV{PATH}"
+
+ LOG_BUILD ON
+ LOG_UPDATE ON
+ LOG_INSTALL ON
+ LOG_CONFIGURE OFF
+ LOG_OUTPUT_ON_FAILURE ON
+
+ # Ensure dependencies like Yosys are built first
+ DEPENDS yosys # Ensure submodule sync runs before synlig build
+ )
- # dependency
- DEPENDS surelog yosys
- )
endif ()
-
endif ()
-if (${VTR_ENABLE_CAPNPROTO})
- # Override default policy for capnproto (CMake policy version 3.1)
- # Enable new IPO variables
- set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
- # Enable option overrides via variables
- set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
+ if (${VTR_ENABLE_CAPNPROTO})
+ # Override default policy for capnproto (CMake policy version 3.1)
+ # Enable new IPO variables
+ set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)
- # Re-enable CXX extensions for capnproto.
- set(CMAKE_CXX_EXTENSIONS ON)
+ # Enable option overrides via variables
+ set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
- # Disable capnproto tests
- set(BUILD_TESTING OFF)
+ # Re-enable CXX extensions for capnproto.
+ set(CMAKE_CXX_EXTENSIONS ON)
- #Since capnproto is an externally developed library/tool, we suppress all compiler warnings
- CHECK_CXX_COMPILER_FLAG("-w" CXX_COMPILER_SUPPORTS_-w)
- if (CXX_COMPILER_SUPPORTS_-w)
- add_compile_options("-w")
- endif ()
+ # Disable capnproto tests
+ set(BUILD_TESTING OFF)
- add_subdirectory(capnproto EXCLUDE_FROM_ALL)
+ #Since capnproto is an externally developed library/tool, we suppress all compiler warnings
+ CHECK_CXX_COMPILER_FLAG("-w" CXX_COMPILER_SUPPORTS_-w)
+ if (CXX_COMPILER_SUPPORTS_-w)
+ add_compile_options("-w")
+ endif ()
- #Some capnproto kj headers (e.g. filesystem.h) generate warnings, treat them as system headers to suppress warnings
- #We suppress them here since we include the capnproto sub-tree as is and do not modify its CMakeLists.txts
- target_include_directories(kj SYSTEM INTERFACE
- $
- $
- )
+ add_subdirectory(capnproto EXCLUDE_FROM_ALL)
+
+ #Some capnproto kj headers (e.g. filesystem.h) generate warnings, treat them as system headers to suppress warnings
+ #We suppress them here since we include the capnproto sub-tree as is and do not modify its CMakeLists.txts
+ target_include_directories(kj SYSTEM INTERFACE
+ $
+ $
+ )
endif ()
# Some catch2 headers generate warnings, so treat them as system headers to suppress warnings
diff --git a/libs/EXTERNAL/libcatch2 b/libs/EXTERNAL/libcatch2
index fa43b77429b..76f70b1403d 160000
--- a/libs/EXTERNAL/libcatch2
+++ b/libs/EXTERNAL/libcatch2
@@ -1 +1 @@
-Subproject commit fa43b77429ba76c462b1898d6cd2f2d7a9416b14
+Subproject commit 76f70b1403dbc0781216f49e20e45b71f7eccdd8
diff --git a/libs/EXTERNAL/libezgl/include/ezgl/point.hpp b/libs/EXTERNAL/libezgl/include/ezgl/point.hpp
index ee8d5d17b90..a0d39279f19 100644
--- a/libs/EXTERNAL/libezgl/include/ezgl/point.hpp
+++ b/libs/EXTERNAL/libezgl/include/ezgl/point.hpp
@@ -36,7 +36,7 @@ class point2d {
/**
* Create a point at the given x and y position.
*/
- point2d(double x_coord, double y_coord) : x(x_coord), y(y_coord)
+ point2d(double x_coord, double y_coord) noexcept : x(x_coord), y(y_coord)
{
}
diff --git a/libs/EXTERNAL/libezgl/include/ezgl/rectangle.hpp b/libs/EXTERNAL/libezgl/include/ezgl/rectangle.hpp
index 70138eae392..a454e7f107c 100644
--- a/libs/EXTERNAL/libezgl/include/ezgl/rectangle.hpp
+++ b/libs/EXTERNAL/libezgl/include/ezgl/rectangle.hpp
@@ -33,7 +33,7 @@ class rectangle {
/**
* Default constructor: Create a zero-sized rectangle at {0,0}.
*/
- rectangle() : m_first({0, 0}), m_second({0, 0})
+ rectangle() noexcept : m_first({0, 0}), m_second({0, 0})
{
}
diff --git a/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp b/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp
index 71f5b7a874e..28340d53885 100644
--- a/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp
+++ b/libs/EXTERNAL/libtatum/libtatum/tatum/util/tatum_strong_id.hpp
@@ -161,7 +161,7 @@ template
bool operator!=(const StrongId& lhs, const StrongId& rhs);
template
-bool operator<(const StrongId& lhs, const StrongId& rhs);
+bool operator<(const StrongId& lhs, const StrongId& rhs) noexcept;
//Class template definition with default template parameters
@@ -198,7 +198,7 @@ class StrongId {
// after the function name (i.e. <>)
friend bool operator== <>(const StrongId& lhs, const StrongId& rhs);
friend bool operator!= <>(const StrongId& lhs, const StrongId& rhs);
- friend bool operator< <>(const StrongId& lhs, const StrongId& rhs);
+ friend bool operator< <>(const StrongId& lhs, const StrongId& rhs) noexcept;
private:
T id_;
};
@@ -215,7 +215,7 @@ bool operator!=(const StrongId& lhs, const StrongId
-bool operator<(const StrongId& lhs, const StrongId& rhs) {
+bool operator<(const StrongId& lhs, const StrongId& rhs) noexcept {
return lhs.id_ < rhs.id_;
}
diff --git a/libs/EXTERNAL/sockpp b/libs/EXTERNAL/sockpp
index 5388c4b5659..599f750c8b6 160000
--- a/libs/EXTERNAL/sockpp
+++ b/libs/EXTERNAL/sockpp
@@ -1 +1 @@
-Subproject commit 5388c4b5659e99a86bc906dd6ac2eef66f1dd51e
+Subproject commit 599f750c8b6532950d4bb43b2b756700e41dbae9
diff --git a/libs/libarchfpga/CMakeLists.txt b/libs/libarchfpga/CMakeLists.txt
index 65e9096cd37..283ddb55959 100644
--- a/libs/libarchfpga/CMakeLists.txt
+++ b/libs/libarchfpga/CMakeLists.txt
@@ -26,6 +26,8 @@ target_link_libraries(libarchfpga
if(${VTR_ENABLE_CAPNPROTO})
target_link_libraries(libarchfpga libvtrcapnproto)
+ find_package(ZLIB REQUIRED)
+ target_link_libraries(libarchfpga ZLIB::ZLIB)
target_compile_definitions(libarchfpga PRIVATE VTR_ENABLE_CAPNPROTO)
endif()
diff --git a/libs/libarchfpga/src/arch_check.cpp b/libs/libarchfpga/src/arch_check.cpp
index c8fb00299c4..5360d6e4c02 100644
--- a/libs/libarchfpga/src/arch_check.cpp
+++ b/libs/libarchfpga/src/arch_check.cpp
@@ -32,7 +32,7 @@ bool check_model_clocks(t_model* model, const char* file, uint32_t line) {
bool check_model_combinational_sinks(const t_model* model, const char* file, uint32_t line) {
//Outputs should have no combinational sinks
for (t_model_ports* port = model->outputs; port != nullptr; port = port->next) {
- if (port->combinational_sink_ports.size() != 0) {
+ if (!port->combinational_sink_ports.empty()) {
archfpga_throw(file, line,
"Model '%s' output port '%s' can not have combinational sink ports",
model->name, port->name);
@@ -114,9 +114,9 @@ void check_port_direct_mappings(t_physical_tile_type_ptr physical_tile, t_sub_ti
}
for (auto pin_map : pin_direct_map) {
- auto block_port = get_port_by_pin(logical_block, pin_map.first.pin);
+ const t_port* block_port = logical_block->get_port_by_pin(pin_map.first.pin);
- auto sub_tile_port = get_port_by_pin(sub_tile, pin_map.second.pin);
+ const t_physical_tile_port* sub_tile_port = sub_tile->get_port_by_pin(pin_map.second.pin);
VTR_ASSERT(block_port != nullptr);
VTR_ASSERT(sub_tile_port != nullptr);
diff --git a/libs/libarchfpga/src/arch_util.cpp b/libs/libarchfpga/src/arch_util.cpp
index 80b21308e9f..f991ebf30ad 100644
--- a/libs/libarchfpga/src/arch_util.cpp
+++ b/libs/libarchfpga/src/arch_util.cpp
@@ -851,6 +851,7 @@ void ProcessMemoryClass(t_pb_type* mem_pb_type) {
mem_pb_type->model = nullptr;
mem_pb_type->modes[0].num_interconnect = mem_pb_type->num_ports * num_pb;
+ VTR_ASSERT(mem_pb_type->modes[0].num_interconnect > 0);
mem_pb_type->modes[0].interconnect = new t_interconnect[mem_pb_type->modes[0].num_interconnect];
for (i = 0; i < mem_pb_type->modes[0].num_interconnect; i++) {
diff --git a/libs/libarchfpga/src/arch_util.h b/libs/libarchfpga/src/arch_util.h
index c39cf77b94f..fb251bffe10 100644
--- a/libs/libarchfpga/src/arch_util.h
+++ b/libs/libarchfpga/src/arch_util.h
@@ -23,8 +23,8 @@ class InstPort {
InstPort() = default;
InstPort(const std::string& str);
- std::string instance_name() const { return instance_.name; }
- std::string port_name() const { return port_.name; }
+ const std::string& instance_name() const { return instance_.name; }
+ const std::string& port_name() const { return port_.name; }
int instance_low_index() const { return instance_.low_idx; }
int instance_high_index() const { return instance_.high_idx; }
@@ -40,7 +40,7 @@ class InstPort {
private:
struct name_index {
- std::string name = "";
+ std::string name;
int low_idx = UNSPECIFIED;
int high_idx = UNSPECIFIED;
};
diff --git a/libs/libarchfpga/src/cad_types.h b/libs/libarchfpga/src/cad_types.h
index 5ab46816b99..af0aac9a994 100644
--- a/libs/libarchfpga/src/cad_types.h
+++ b/libs/libarchfpga/src/cad_types.h
@@ -104,7 +104,7 @@ struct t_pack_patterns {
std::vector> chain_root_pins;
// default constructor initializing to an invalid pack pattern
- t_pack_patterns() {
+ t_pack_patterns() noexcept {
name = nullptr;
index = -1;
root_block = nullptr;
@@ -126,7 +126,7 @@ struct t_cluster_placement_primitive {
t_pb_graph_node* pb_graph_node;
bool valid;
float base_cost; /* cost independent of current status of packing */
- float incremental_cost; /* cost dependant on current status of packing */
+ float incremental_cost; /* cost dependent on current status of packing */
};
#endif
diff --git a/libs/libarchfpga/src/device_grid.h b/libs/libarchfpga/src/device_grid.h
index 8e1332559ed..c04e3f8dc4e 100644
--- a/libs/libarchfpga/src/device_grid.h
+++ b/libs/libarchfpga/src/device_grid.h
@@ -19,8 +19,11 @@ struct t_grid_tile {
const t_metadata_dict* meta = nullptr;
};
-///@brief DeviceGrid represents the FPGA fabric. It is used to get information about different layers and tiles.
-// TODO: All of the function that use helper functions of this class should pass the layer_num to the functions, and the default value of layer_num should be deleted eventually.
+//TODO: All of the functions that use helper functions of this class should pass the layer_num to the functions, and the default value of layer_num should be deleted eventually.
+/**
+ * @class DeviceGrid
+ * @brief Represents the FPGA fabric. It is used to get information about different layers and tiles.
+ */
class DeviceGrid {
public:
DeviceGrid() = default;
@@ -77,6 +80,10 @@ class DeviceGrid {
inline int get_height_offset(const t_physical_tile_loc& tile_loc) const {
return grid_[tile_loc.layer_num][tile_loc.x][tile_loc.y].height_offset;
}
+ ///@brief Returns true if the given location is the root location (bottom left corner) of a tile.
+ inline bool is_root_location(const t_physical_tile_loc& tile_loc) const {
+ return get_width_offset(tile_loc) == 0 && get_height_offset(tile_loc) == 0;
+ }
///@brief Returns a rectangle which represents the bounding box of the tile at the given location.
inline vtr::Rect get_tile_bb(const t_physical_tile_loc& tile_loc) const {
diff --git a/libs/libarchfpga/src/echo_arch.cpp b/libs/libarchfpga/src/echo_arch.cpp
index 4fb52c415f2..edceeb748fb 100644
--- a/libs/libarchfpga/src/echo_arch.cpp
+++ b/libs/libarchfpga/src/echo_arch.cpp
@@ -297,7 +297,7 @@ void PrintArchInfo(FILE* Echo, const t_arch* arch) {
int num_layers = (int)layout.layers.size();
if (num_layers > 1) {
fprintf(Echo, "\t\t\t\ttype unidir mux_name for between two dice connections: %s\n",
- arch->switches[seg.arch_opin_between_dice_switch].name.c_str());
+ arch->switches[seg.arch_inter_die_switch].name.c_str());
}
}
} else { //Should be bidir
diff --git a/libs/libarchfpga/src/parse_switchblocks.cpp b/libs/libarchfpga/src/parse_switchblocks.cpp
index 1e8908713bf..3ef9ec8e155 100644
--- a/libs/libarchfpga/src/parse_switchblocks.cpp
+++ b/libs/libarchfpga/src/parse_switchblocks.cpp
@@ -4,19 +4,15 @@
*
*
* A large chunk of this file is dedicated to helping parse the initial switchblock
- * specificaiton in the XML arch file, providing error checking, etc.
+ * specification in the XML arch file, providing error checking, etc.
*
* Another large chunk of this file is dedicated to parsing the actual formulas
* specified by the switch block permutation functions into their numeric counterparts.
*/
-#include
+#include
#include
-#include
#include
-#include
-#include
-#include
#include "vtr_assert.h"
#include "vtr_util.h"
@@ -26,9 +22,7 @@
#include "arch_error.h"
-#include "read_xml_util.h"
#include "arch_util.h"
-#include "arch_types.h"
#include "physical_types.h"
#include "parse_switchblocks.h"
diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp
index 3bdabaee2a7..9b72cb95758 100644
--- a/libs/libarchfpga/src/physical_types.cpp
+++ b/libs/libarchfpga/src/physical_types.cpp
@@ -71,6 +71,42 @@ bool t_rr_switch_inf::configurable() const {
return switch_type_is_configurable(type());
}
+bool t_rr_switch_inf::operator==(const t_rr_switch_inf& other) const {
+ return R == other.R
+ && Cin == other.Cin
+ && Cout == other.Cout
+ && Cinternal == other.Cinternal
+ && Tdel == other.Tdel
+ && mux_trans_size == other.mux_trans_size
+ && buf_size == other.buf_size
+ && power_buffer_type == other.power_buffer_type
+ && power_buffer_size == other.power_buffer_size
+ && intra_tile == other.intra_tile
+ && type() == other.type();
+}
+
+std::size_t t_rr_switch_inf::Hasher::operator()(const t_rr_switch_inf& s) const {
+ std::size_t hash_val = 0;
+
+ auto hash_combine = [&hash_val](auto&& val) {
+ hash_val ^= std::hash>{}(val) + 0x9e3779b9 + (hash_val << 6) + (hash_val >> 2);
+ };
+
+ hash_combine(s.R);
+ hash_combine(s.Cin);
+ hash_combine(s.Cout);
+ hash_combine(s.Cinternal);
+ hash_combine(s.Tdel);
+ hash_combine(s.mux_trans_size);
+ hash_combine(s.buf_size);
+ hash_combine(static_cast(s.power_buffer_type));
+ hash_combine(s.power_buffer_size);
+ hash_combine(s.intra_tile);
+ hash_combine(static_cast(s.type()));
+
+ return hash_val;
+}
+
void t_rr_switch_inf::set_type(SwitchType type_val) {
type_ = type_val;
}
@@ -136,6 +172,56 @@ bool t_physical_tile_type::is_empty() const {
return name == std::string(EMPTY_BLOCK_NAME);
}
+int t_physical_tile_type::find_pin(std::string_view port_name, int pin_index_in_port) const {
+ int ipin = OPEN;
+ int port_base_ipin = 0;
+ int num_port_pins = OPEN;
+ int pin_offset = 0;
+
+ bool port_found = false;
+ for (const t_sub_tile& sub_tile : sub_tiles) {
+ for (const t_physical_tile_port& port : sub_tile.ports) {
+ if (port_name == port.name) {
+ port_found = true;
+ num_port_pins = port.num_pins;
+ break;
+ }
+
+ port_base_ipin += port.num_pins;
+ }
+
+ if (port_found) {
+ break;
+ }
+
+ port_base_ipin = 0;
+ pin_offset += sub_tile.num_phy_pins;
+ }
+
+ if (num_port_pins != OPEN) {
+ VTR_ASSERT(pin_index_in_port < num_port_pins);
+
+ ipin = port_base_ipin + pin_index_in_port + pin_offset;
+ }
+
+ return ipin;
+}
+
+int t_physical_tile_type::find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const {
+ int iclass = OPEN;
+
+ int ipin = find_pin(port_name, pin_index_in_port);
+
+ if (ipin != OPEN) {
+ iclass = pin_class[ipin];
+
+ if (iclass != OPEN) {
+ VTR_ASSERT(class_inf[iclass].type == pin_type);
+ }
+ }
+ return iclass;
+}
+
/*
* t_logical_block_type
*/
@@ -144,6 +230,28 @@ bool t_logical_block_type::is_empty() const {
return name == std::string(EMPTY_BLOCK_NAME);
}
+const t_port* t_logical_block_type::get_port(std::string_view port_name) const {
+ for (int i = 0; i < pb_type->num_ports; i++) {
+ auto port = pb_type->ports[i];
+ if (port_name == port.name) {
+ return &pb_type->ports[port.index];
+ }
+ }
+
+ return nullptr;
+}
+
+const t_port* t_logical_block_type::get_port_by_pin(int pin) const {
+ for (int i = 0; i < pb_type->num_ports; i++) {
+ const t_port& port = pb_type->ports[i];
+ if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+ return &pb_type->ports[port.index];
+ }
+ }
+
+ return nullptr;
+}
+
/**
* t_pb_graph_node
*/
@@ -220,7 +328,7 @@ std::string t_pb_graph_pin::to_string(const bool full_description) const {
return pin_string;
}
-/**
+/*
* t_pb_graph_edge
*/
@@ -253,3 +361,39 @@ bool t_pb_graph_edge::belongs_to_pattern(int pattern_index) const {
// return false otherwise
return false;
}
+
+/*
+ * t_sub_tile
+ */
+
+int t_sub_tile::total_num_internal_pins() const {
+ int num_pins = 0;
+
+ for (t_logical_block_type_ptr eq_site : equivalent_sites) {
+ num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size();
+ }
+
+ num_pins *= capacity.total();
+
+ return num_pins;
+}
+
+const t_physical_tile_port* t_sub_tile::get_port(std::string_view port_name) {
+ for (const t_physical_tile_port& port : ports) {
+ if (port_name == port.name) {
+ return &ports[port.index];
+ }
+ }
+
+ return nullptr;
+}
+
+const t_physical_tile_port* t_sub_tile::get_port_by_pin(int pin) const {
+ for (const t_physical_tile_port& port : ports) {
+ if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
+ return &ports[port.index];
+ }
+ }
+
+ return nullptr;
+}
diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h
index a0351076127..19a625db601 100644
--- a/libs/libarchfpga/src/physical_types.h
+++ b/libs/libarchfpga/src/physical_types.h
@@ -24,8 +24,7 @@
* Authors: Jason Luu and Kenneth Kent
*/
-#ifndef PHYSICAL_TYPES_H
-#define PHYSICAL_TYPES_H
+#pragma once
#include
#include
@@ -97,7 +96,7 @@ enum class e_sb_type;
// Metadata value storage.
class t_metadata_value {
public:
- explicit t_metadata_value(vtr::interned_string v)
+ explicit t_metadata_value(vtr::interned_string v) noexcept
: value_(v) {}
explicit t_metadata_value(const t_metadata_value& o) noexcept
: value_(o.value_) {}
@@ -546,10 +545,13 @@ struct t_port_power {
bool reverse_scaled; /* Scale by (1-prob) */
};
-//The type of Fc specification
+/**
+ * @enum e_fc_type
+ * @brief The type of Fc specification
+ */
enum class e_fc_type {
- IN, //The fc specification for an input pin
- OUT //The fc specification for an output pin
+ IN, /**< Fc specification for an input pin. */
+ OUT /**< Fc specification for an output pin. */
};
//The value type of the Fc specification
@@ -709,12 +711,6 @@ struct t_physical_tile_type {
* tile_block_pin_directs_map[logical block index][logical block pin] -> physical tile pin */
std::unordered_map>> tile_block_pin_directs_map;
- /* Returns the indices of pins that contain a clock for this physical logic block */
- std::vector get_clock_pins_indices() const;
-
- // Returns the sub tile location of the physical tile given an input pin
- int get_sub_tile_loc_from_pin(int pin_num) const;
-
// TODO: Remove is_input_type / is_output_type as part of
// https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/1193
@@ -724,8 +720,21 @@ struct t_physical_tile_type {
// Does this t_physical_tile_type contain an outpad?
bool is_output_type = false;
- // Is this t_physical_tile_type an empty type?
+ public: // Function members
+ ///@brief Returns the indices of pins that contain a clock for this physical logic block
+ std::vector get_clock_pins_indices() const;
+
+ ///@brief Returns the sub tile location of the physical tile given an input pin
+ int get_sub_tile_loc_from_pin(int pin_num) const;
+
+ ///@brief Is this t_physical_tile_type an empty type?
bool is_empty() const;
+
+ ///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
+ int find_pin(std::string_view port_name, int pin_index_in_port) const;
+
+ ///@brief Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
+ int find_pin_class(std::string_view port_name, int pin_index_in_port, e_pin_type pin_type) const;
};
/* Holds the capacity range of a certain sub_tile block within the parent physical tile type.
@@ -801,6 +810,19 @@ struct t_sub_tile {
int num_phy_pins = 0;
int index = -1;
+
+ public:
+ int total_num_internal_pins() const;
+
+ /**
+ * @brief Returns the physical tile port given the port name and the corresponding sub tile
+ */
+ const t_physical_tile_port* get_port(std::string_view port_name);
+
+ /**
+ * @brief Returns the physical tile port given the pin name and the corresponding sub tile
+ */
+ const t_physical_tile_port* get_port_by_pin(int pin) const;
};
/** A logical pin defines the pin index of a logical block type (i.e. a top level PB type)
@@ -955,6 +977,17 @@ struct t_logical_block_type {
// Is this t_logical_block_type empty?
bool is_empty() const;
+
+ public:
+ /**
+ * @brief Returns the logical block port given the port name and the corresponding logical block type
+ */
+ const t_port* get_port(std::string_view port_name) const;
+
+ /**
+ * @brief Returns the logical block port given the pin name and the corresponding logical block type
+ */
+ const t_port* get_port_by_pin(int pin) const;
};
/*************************************************************************************************
@@ -1253,8 +1286,8 @@ struct t_pin_to_pin_annotation {
* flat_site_index : Index of this primitive site within its primitive type within this cluster type.
* Values are in [0...total_primitive_count-1], e.g. if there are 10 ALMs per cluster, 2 FFS
* and 2 LUTs per ALM, then flat site indices for FFs would run from 0 to 19, and flat site
- indices for LUTs would run from 0 to 19. This member is only used by nodes corresponding
- to primitive sites. It is used when reconstructing clusters from a flat placement file.
+ * indices for LUTs would run from 0 to 19. This member is only used by nodes corresponding
+ * to primitive sites. It is used when reconstructing clusters from a flat placement file.
* illegal_modes : vector containing illegal modes that result in conflicts during routing
*/
class t_pb_graph_node {
@@ -1308,7 +1341,7 @@ class t_pb_graph_node {
int total_pb_pins; /* only valid for top-level */
- void* temp_scratch_pad; /* temporary data, useful for keeping track of things when traversing data structure */
+ void* temp_scratch_pad; /* temporary data, useful for keeping track of things when traversing data structure */
int* input_pin_class_size; /* Stores the number of pins that belong to a particular input pin class */
int num_input_pin_class; /* number of input pin classes that this pb_graph_node has */
@@ -1318,7 +1351,6 @@ class t_pb_graph_node {
int total_primitive_count; /* total number of this primitive type in the cluster */
int flat_site_index; /* index of this primitive within sites of its type in this cluster */
-
/* Interconnect instances for this pb
* Only used for power
*/
@@ -1538,6 +1570,7 @@ enum e_directionality {
UNI_DIRECTIONAL,
BI_DIRECTIONAL
};
+
/* X_AXIS: Data that describes an x-directed wire segment (CHANX) *
* Y_AXIS: Data that describes an y-directed wire segment (CHANY) *
* BOTH_AXIS: Data that can be applied to both x-directed and y-directed wire segment */
@@ -1560,115 +1593,186 @@ enum class SegResType {
NUM_RES_TYPES
};
-constexpr std::array(SegResType::NUM_RES_TYPES)> RES_TYPE_STRING = {{"GCLK", "GENERAL"}}; //String versions of segment resource types
+/// String versions of segment resource types
+constexpr std::array(SegResType::NUM_RES_TYPES)> RES_TYPE_STRING{"GCLK", "GENERAL"};
+/// Defines the type of switch block used in FPGA routing.
enum e_switch_block_type {
+ /// If the type is SUBSET, I use a Xilinx-like switch block where track i in one channel always
+ /// connects to track i in other channels.
SUBSET,
+
+ /// If type is WILTON, I use a switch block where track i
+ /// does not always connect to track i in other channels.
+ /// See Steve Wilton, PhD Thesis, University of Toronto, 1996.
WILTON,
+
+ /// The UNIVERSAL switch block is from Y. W. Chang et al, TODAES, Jan. 1996, pp. 80 - 101.
UNIVERSAL,
+
+ /// The FULL switch block type allows for complete connectivity between tracks.
FULL,
+
+ /// A CUSTOM switch block has also been added which allows a user to describe custom permutation functions and connection patterns.
+ /// See comment at top of SRC/route/build_switchblocks.c
CUSTOM
};
-typedef enum e_switch_block_type t_switch_block_type;
+
enum e_Fc_type {
ABSOLUTE,
FRACTIONAL
};
-/* Lists all the important information about a certain segment type. Only *
- * used if the route_type is DETAILED. [0 .. det_routing_arch.num_segment] *
- * name: the name of this segment *
- * frequency: ratio of tracks which are of this segment type. *
- * length: Length (in clbs) of the segment. *
- * arch_wire_switch: Index of the switch type that connects other wires *
- * *to* this segment. Note that this index is in relation *
- * to the switches from the architecture file, not the *
- * expanded list of switches that is built at the end of *
- * build_rr_graph. *
- * arch_opin_switch: Index of the switch type that connects output pins *
- * (OPINs) *to* this segment. Note that this index is in *
- * relation to the switches from the architecture file, *
- * not the expanded list of switches that is built *
- * at the end of build_rr_graph *
- * @param arch_wire_switch_dec: Same as arch_wire_switch but used only for *
- * decremental tracks if it is specified in the *
- * architecture file. If -1, this value was not set in *
- * the architecture file and arch_wire_switch should be *
- * used for "DEC_DIR" wire segments. *
- * @param arch_opin_switch_dec: Same as arch_opin_switch but used only for *
- * decremental tracks if it is specified in the *
- * architecture file. If -1, this value was not set in *
- * the architecture file and arch_opin_switch should be *
- * used for "DEC_DIR" wire segments. *
- * @param arch_opin_between_dice_switch: Index of the switch type that *
- * connects output pins (OPINs) *to* this segment from *
- * *another die (layer)*. Note that this index is in *
- * relation to the switches from the architecture file, *
- * not the expanded list of switches that is built at *
- * the end of build_rr_graph *
- * *
- * frac_cb: The fraction of logic blocks along its length to which this *
- * segment can connect. (i.e. internal population). *
- * frac_sb: The fraction of the length + 1 switch blocks along the segment *
- * to which the segment can connect. Segments that aren't long *
- * lines must connect to at least two switch boxes. *
- * parallel_axis: Defines what axis the segment is parallel to. See *
- * e_parallel_axis comments for more details on the values. *
- * Cmetal: Capacitance of a routing track, per unit logic block length. *
- * Rmetal: Resistance of a routing track, per unit logic block length. *
- * (UDSD by AY) drivers: How do signals driving a routing track connect to *
- * the track? *
- * seg_index: The index of the segment as stored in the appropriate Segs list*
- * Upon loading the architecture, we use this field to keep track *
- * the segment's index in the unified segment_inf vector. This is *
- * useful when building the rr_graph for different Y & X channels *
- * in terms of track distribution and segment type. *
- * res_type: Determines the routing network to which the segment belongs. *
- * Possible values are: *
- * - GENERAL: The segment is part of the general routing *
- * resources. *
- * - GCLK: The segment is part of the global routing network. *
- * For backward compatibility, this attribute is optional. If not *
- * specified, the resource type for the segment is considered to *
- * be GENERAL. *
- * meta: Table storing extra arbitrary metadata attributes. *
- *
- *
- * New added parameters for bend wires: *
- * isbend: This segment is bend or not *
- * bend: The bend type of the segment, "-"-0, "U"-1, "D"-2 *
- * For example: bend pattern <- - U ->; corresponding bend: [0,0,1,0] *
- * part_len: Divide the segment into several parts based on bend position. *
- * For example: length-5 bend segment: <- - U ->; *
- * Corresponding part_len: [3,2] */
+/**
+ * @brief Lists all the important information about a certain segment type. Only
+ * used if the route_type is DETAILED. [0 .. det_routing_arch.num_segment]
+ */
struct t_segment_inf {
+ /**
+ * @brief The name of the segment type
+ */
std::string name;
+
+ /**
+ * @brief ratio of tracks which are of this segment type.
+ */
int frequency;
+
+ /**
+ * @brief Length (in clbs) of the segment.
+ */
int length;
+
+ /**
+ * @brief Index of the switch type that connects other wires to this segment.
+ * Note that this index is in relation to the switches from the architecture file,
+ * not the expanded list of switches that is built at the end of build_rr_graph.
+ */
short arch_wire_switch;
+
+ /**
+ * @brief Index of the switch type that connects output pins to this segment.
+ * Note that this index is in relation to the switches from the architecture file,
+ * not the expanded list of switches that is built at the end of build_rr_graph.
+ */
short arch_opin_switch;
+
+ /**
+ * @brief Same as arch_wire_switch but used only for decremental tracks if it is
+ * specified in the architecture file. If -1, this value was not set in the
+ * architecture file and arch_wire_switch should be used for "DEC_DIR" wire segments.
+ */
short arch_wire_switch_dec = -1;
+
+ /**
+ * @brief Same as arch_opin_switch but used only for decremental tracks if
+ * it is specified in the architecture file. If -1, this value was not set in
+ * the architecture file and arch_opin_switch should be used for "DEC_DIR" wire segments.
+ */
short arch_opin_switch_dec = -1;
- short arch_opin_between_dice_switch = -1;
+
+ /**
+ * @brief Index of the switch type that connects output pins (OPINs) to this
+ * segment from another die (layer). Note that this index is in relation to
+ * the switches from the architecture file, not the expanded list of switches
+ * that is built at the end of build_rr_graph.
+ */
+ short arch_inter_die_switch = -1;
+
+ /**
+ * @brief The fraction of logic blocks along its length to which this segment can connect.
+ * (i.e. internal population).
+ */
float frac_cb;
+
+ /**
+ * @brief The fraction of the length + 1 switch blocks along the segment to which the segment can connect.
+ * Segments that aren't long lines must connect to at least two switch boxes.
+ */
float frac_sb;
+
bool longline;
+
+ /**
+ * @brief The resistance of a routing track, per unit logic block length. */
float Rmetal;
+
+ /**
+ * @brief The capacitance of a routing track, per unit logic block length. */
float Cmetal;
+
enum e_directionality directionality;
+
+ /**
+ * @brief Defines what axis the segment is parallel to. See e_parallel_axis
+ * comments for more details on the values.
+ */
enum e_parallel_axis parallel_axis;
+
+ /**
+ * @brief A vector of booleans indicating whether the segment can connect to a logic block.
+ */
std::vector cb;
+
+ /**
+ * @brief A vector of booleans indicating whether the segment can connect to a switch block.
+ */
std::vector sb;
- bool isbend;
+
+ /**
+ * @brief This segment is bend or not
+ */
+ bool isbend;
+
+ /**
+ * @brief The bend type of the segment, "-"-0, "U"-1, "D"-2
+ * For example: bend pattern <- - U ->; corresponding bend: [0,0,1,0]
+ */
std::vector bend;
+
+ /**
+ * @brief Divide the segment into several parts based on bend position.
+ * For example: length-5 bend segment: <- - U ->;
+ * Corresponding part_len: [3,2]
+ */
std::vector part_len;
+
+ /**
+ * @brief The index of the segment as stored in the appropriate Segs list.
+ * Upon loading the architecture, we use this field to keep track of the
+ * segment's index in the unified segment_inf vector. This is useful when
+ * building the rr_graph for different Y & X channels in terms of track
+ * distribution and segment type.
+ */
int seg_index;
+
+ /**
+ * @brief Determines the routing network to which the segment belongs.
+ * Possible values are:
+ * - GENERAL: The segment is part of the general routing resources.
+ * - GCLK: The segment is part of the global routing network.
+ * For backward compatibility, this attribute is optional. If not specified,
+ * the resource type for the segment is considered to be GENERAL.
+ */
enum SegResType res_type = SegResType::GENERAL;
- //float Cmetal_per_m; /* Wire capacitance (per meter) */
};
inline bool operator==(const t_segment_inf& a, const t_segment_inf& b) {
- return a.name == b.name && a.frequency == b.frequency && a.length == b.length && a.arch_wire_switch == b.arch_wire_switch && a.arch_opin_switch == b.arch_opin_switch && a.arch_opin_between_dice_switch == b.arch_opin_between_dice_switch && a.frac_cb == b.frac_cb && a.frac_sb == b.frac_sb && a.longline == b.longline && a.Rmetal == b.Rmetal && a.Cmetal == b.Cmetal && a.directionality == b.directionality && a.parallel_axis == b.parallel_axis && a.cb == b.cb && a.sb == b.sb;
+ return a.name == b.name
+ && a.frequency == b.frequency
+ && a.length == b.length
+ && a.arch_wire_switch == b.arch_wire_switch
+ && a.arch_opin_switch == b.arch_opin_switch
+ && a.arch_inter_die_switch == b.arch_inter_die_switch
+ && a.frac_cb == b.frac_cb
+ && a.frac_sb == b.frac_sb
+ && a.longline == b.longline
+ && a.Rmetal == b.Rmetal
+ && a.Cmetal == b.Cmetal
+ && a.directionality == b.directionality
+ && a.parallel_axis == b.parallel_axis
+ && a.cb == b.cb
+ && a.sb == b.sb;
}
/*provide hashing for t_segment_inf to enable the use of many std containers.
@@ -1840,16 +1944,28 @@ struct t_rr_switch_inf {
bool intra_tile = false;
public:
- //Returns the type of switch
+ /// Returns the type of switch
SwitchType type() const;
- //Returns true if this switch type isolates its input and output into
- //separate DC-connected subcircuits
+ /// Returns true if this switch type isolates its input and output into
+ /// separate DC-connected subcircuits
bool buffered() const;
- //Returns true if this switch type is configurable
+ /// Returns true if this switch type is configurable
bool configurable() const;
+ bool operator==(const t_rr_switch_inf& other) const;
+
+ /**
+ * @brief Functor for computing a hash value for t_rr_switch_inf.
+ *
+ * This custom hasher enables the use of t_rr_switch_inf objects as keys
+ * in unordered containers such as std::unordered_map or std::unordered_set.
+ */
+ struct Hasher {
+ std::size_t operator()(const t_rr_switch_inf& s) const;
+ };
+
public:
void set_type(SwitchType type_val);
@@ -1973,7 +2089,7 @@ struct t_switchblock_inf {
/* We can also define a region to apply this SB to all locations falls into this region using regular expression in the architecture file*/
t_sb_loc_spec reg_x;
t_sb_loc_spec reg_y;
-
+
t_permutation_map permutation_map; /* map holding the permutation functions attributed to this switchblock */
std::vector wireconns; /* list of wire types/groups this SB will connect */
@@ -2062,8 +2178,6 @@ struct t_noc_inf {
std::string noc_router_tile_name;
};
-
-
/* Detailed routing architecture */
struct t_arch {
/** Stores unique strings used as key and values in tags,
@@ -2143,11 +2257,11 @@ struct t_arch {
std::vector ipin_cblock_switch_name;
std::vector grid_layouts; //Set of potential device layouts
-
+
//the layout that is chosen to be used with command line options
//It is used to generate custom SB for a specific locations within the device
//If the layout is not specified in the command line options, this variable will be set to "auto"
- std::string device_layout;
+ std::string device_layout;
std::vector vib_grid_layouts;
@@ -2160,5 +2274,3 @@ struct t_arch {
//bool is_vib_arch = false;
std::vector vib_infs;
};
-
-#endif
diff --git a/libs/libarchfpga/src/physical_types_util.cpp b/libs/libarchfpga/src/physical_types_util.cpp
index 2256f81d66c..2ecc7fbd41c 100644
--- a/libs/libarchfpga/src/physical_types_util.cpp
+++ b/libs/libarchfpga/src/physical_types_util.cpp
@@ -154,7 +154,7 @@ static std::tuple get_pin_index_for_inst(t_physical_til
pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst;
} else {
int pin_offset = get_sub_tile_inst_physical_pin_num_offset(type, sub_tile, sub_tile_cap);
- int pins_per_inst = get_total_num_sub_tile_internal_pins(sub_tile) / sub_tile->capacity.total();
+ int pins_per_inst = sub_tile->total_num_internal_pins() / sub_tile->capacity.total();
pin_inst_num = (pin_physical_num - pin_offset) % pins_per_inst;
}
@@ -225,7 +225,7 @@ static int get_sub_tile_physical_pin_num_offset(t_physical_tile_type_ptr physica
if (&tmp_sub_tile == curr_sub_tile)
break;
else
- offset += get_total_num_sub_tile_internal_pins(&tmp_sub_tile);
+ offset += tmp_sub_tile.total_num_internal_pins();
}
return offset;
@@ -235,7 +235,7 @@ static int get_sub_tile_inst_physical_pin_num_offset(t_physical_tile_type_ptr ph
const t_sub_tile* curr_sub_tile,
const int curr_relative_cap) {
int offset = get_sub_tile_physical_pin_num_offset(physical_tile, curr_sub_tile);
- int sub_tile_inst_num_pins = get_total_num_sub_tile_internal_pins(curr_sub_tile) / curr_sub_tile->capacity.total();
+ int sub_tile_inst_num_pins = curr_sub_tile->total_num_internal_pins() / curr_sub_tile->capacity.total();
offset += (curr_relative_cap * sub_tile_inst_num_pins);
@@ -563,57 +563,6 @@ int get_max_num_pins(t_logical_block_type_ptr logical_block) {
return max_num_pins;
}
-//Returns the pin class associated with the specified pin_index_in_port within the port port_name on type
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type) {
- int iclass = OPEN;
-
- int ipin = find_pin(type, port_name, pin_index_in_port);
-
- if (ipin != OPEN) {
- iclass = type->pin_class[ipin];
-
- if (iclass != OPEN) {
- VTR_ASSERT(type->class_inf[iclass].type == pin_type);
- }
- }
- return iclass;
-}
-
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port) {
- int ipin = OPEN;
- int port_base_ipin = 0;
- int num_pins = OPEN;
- int pin_offset = 0;
-
- bool port_found = false;
- for (const auto& sub_tile : type->sub_tiles) {
- for (const auto& port : sub_tile.ports) {
- if (0 == strcmp(port.name, port_name.c_str())) {
- port_found = true;
- num_pins = port.num_pins;
- break;
- }
-
- port_base_ipin += port.num_pins;
- }
-
- if (port_found) {
- break;
- }
-
- port_base_ipin = 0;
- pin_offset += sub_tile.num_phy_pins;
- }
-
- if (num_pins != OPEN) {
- VTR_ASSERT(pin_index_in_port < num_pins);
-
- ipin = port_base_ipin + pin_index_in_port + pin_offset;
- }
-
- return ipin;
-}
-
std::pair get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin) {
int pins_to_remove = 0;
for (const auto& sub_tile : physical_tile->sub_tiles) {
@@ -638,7 +587,7 @@ std::pair get_capacity_location_from_physical_pin(t_physical_tile_type
int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_tile, int relative_pin, int capacity_location) {
int pins_to_add = 0;
- for (auto sub_tile : physical_tile->sub_tiles) {
+ for (const t_sub_tile& sub_tile : physical_tile->sub_tiles) {
auto capacity = sub_tile.capacity;
int rel_capacity = capacity_location - capacity.low;
int num_inst_pins = sub_tile.num_phy_pins / capacity.total();
@@ -841,52 +790,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ
return pin_names;
}
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name) {
- for (auto port : sub_tile->ports) {
- if (0 == strcmp(port.name, port_name)) {
- return &sub_tile->ports[port.index];
- }
- }
-
- return nullptr;
-}
-
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name) {
- auto pb_type = type->pb_type;
-
- for (int i = 0; i < pb_type->num_ports; i++) {
- auto port = pb_type->ports[i];
- if (0 == strcmp(port.name, port_name)) {
- return &pb_type->ports[port.index];
- }
- }
-
- return nullptr;
-}
-
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin) {
- for (auto port : sub_tile->ports) {
- if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
- return &sub_tile->ports[port.index];
- }
- }
-
- return nullptr;
-}
-
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin) {
- auto pb_type = type->pb_type;
-
- for (int i = 0; i < pb_type->num_ports; i++) {
- auto port = pb_type->ports[i];
- if (pin >= port.absolute_first_pin_index && pin < port.absolute_first_pin_index + port.num_pins) {
- return &pb_type->ports[port.index];
- }
- }
-
- return nullptr;
-}
-
/* Access information related to pin classes */
/** get information given class physical num **/
@@ -1009,7 +912,7 @@ std::tuple get_sub_tile_from_pin_physical_num(t_physical
int pin_offset = total_pin_counts;
for (auto& sub_tile : physical_tile->sub_tiles) {
- int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : get_total_num_sub_tile_internal_pins(&sub_tile);
+ int sub_tile_num_pins = pin_on_tile ? sub_tile.num_phy_pins : sub_tile.total_num_internal_pins();
total_pin_counts += sub_tile_num_pins;
if (physical_num < total_pin_counts) {
@@ -1347,15 +1250,6 @@ const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_t
return pb_graph_pin->parent_node;
}
-int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile) {
- int num_pins = 0;
- for (auto eq_site : sub_tile->equivalent_sites) {
- num_pins += (int)eq_site->pin_logical_num_to_pb_pin_mapping.size();
- }
- num_pins *= sub_tile->capacity.total();
- return num_pins;
-}
-
int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat) {
if (is_flat) {
return tile->num_pins + (int)tile->pin_num_to_pb_pin.size();
@@ -1538,4 +1432,3 @@ std::map get_sink_choking_points(t_physical_tile_type_ptr physical_til
return choking_point;
}
-/* */
diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index aa7b2617834..a081683faeb 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -1,5 +1,5 @@
-#ifndef PHYSICAL_TYPES_UTIL_H
-#define PHYSICAL_TYPES_UTIL_H
+
+#pragma once
#include "physical_types.h"
@@ -13,11 +13,11 @@
* functions in this file are the following: *
* - physical_tile_type: identifies a placeable tile within *
* the device grid. *
- * - logical_block_tpye: identifies a clustered block type *
+ * - logical_block_type: identifies a clustered block type *
* within the clb_netlist *
* *
* All the following utilities are intended to ease the *
- * developement to access the above mentioned classes and perform *
+ * development to access the above mentioned classes and perform *
* some required operations with their data. *
* *
* Please classify such functions in this file *
@@ -107,7 +107,7 @@
*
* For instance, the following information are required:
* - mapping between logical and sub tile pins.
- * - mapping between sub tile pins and absoulte physical pin
+ * - mapping between sub tile pins and absolute physical pin
* - capacity instance of the sub tile
*
* With all the above information we can calculate correctly the connection between the CLK (logical pin)
@@ -152,12 +152,12 @@ int get_physical_pin_from_capacity_location(t_physical_tile_type_ptr physical_ti
*
* Take the above CLOCK TILE example:
* - given the CLOCK TILE and the index corresponding to the CLK_1 pin, we want the relative pin
- * of one of its sub tiles at a particualr capacity location (i.e. sub tile instance).
+ * of one of its sub tiles at a particular capacity location (i.e. sub tile instance).
*
* std::tie(absolute_capacity, relative_pin) = get_capacity_location_from_physical_pin(clock_tile, 3)
*
* The value returned is (1, 0), where:
- * - 1 corresponds to the capacity location (sub tile instance) where the absoulte physical pin index (CLK_1) is connected
+ * - 1 corresponds to the capacity location (sub tile instance) where the absolute physical pin index (CLK_1) is connected
* - 0 corresponds to the relative pin index within the BUFGCTRL sub tile
*/
std::pair get_capacity_location_from_physical_pin(t_physical_tile_type_ptr physical_tile, int pin);
@@ -173,11 +173,6 @@ std::vector block_type_class_index_to_pin_names(t_physical_tile_typ
///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector& types);
-int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
-
-///@brief Returns the relative pin index within a sub tile that corresponds to the pin within the given port and its index in the port
-int find_pin(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port);
-
///@brief Returns the maximum number of pins within a logical block
int get_max_num_pins(t_logical_block_type_ptr logical_block);
@@ -217,7 +212,7 @@ int get_logical_block_physical_sub_tile_index(t_physical_tile_type_ptr physical_
t_logical_block_type_ptr logical_block);
/**
* @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ * logical index ('pin' of the first instance of 'logical_block' within the physical tile.
*
* This function is called before/during placement, when a sub tile index was not yet assigned.
*
@@ -228,7 +223,7 @@ int get_physical_pin(t_physical_tile_type_ptr physical_tile,
int pin);
/**
* @brief Returns the physical pin index (within 'physical_tile') corresponding to the
- * logical index ('pin' of the first instance of 'logical_block' within the physcial tile.
+ * logical index ('pin' of the first instance of 'logical_block' within the physical tile.
* This function considers if a given offset is in the range of sub tile capacity
*
* (First pin index at current sub-tile) (The wanted pin index)
@@ -286,26 +281,6 @@ int get_sub_tile_physical_pin(int sub_tile_index,
*/
t_physical_tile_port find_tile_port_by_name(t_physical_tile_type_ptr type, std::string_view port_name);
-/**
- * @brief Returns the physical tile port given the port name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_name(t_sub_tile* sub_tile, const char* port_name);
-
-/**
- * @brief Returns the logical block port given the port name and the corresponding logical block type
- */
-const t_port* get_port_by_name(t_logical_block_type_ptr type, const char* port_name);
-
-/**
- * @brief Returns the physical tile port given the pin name and the corresponding sub tile
- */
-const t_physical_tile_port* get_port_by_pin(const t_sub_tile* sub_tile, int pin);
-
-/**
- * @brief Returns the logical block port given the pin name and the corresponding logical block type
- */
-const t_port* get_port_by_pin(t_logical_block_type_ptr type, int pin);
-
/************************************ Access to intra-block resources ************************************/
/* Access information related to pin classes */
@@ -336,12 +311,6 @@ inline bool is_class_on_tile(t_physical_tile_type_ptr physical_tile, int class_p
/**
* @brief Classes are indexed in a way that the number of classes on the same pb_graph_node is continuous
- * @param physical_tile
- * @param sub_tile
- * @param logical_block
- * @param sub_tile_relative_cap
- * @param pb_graph_node
- * @return
*/
t_class_range get_pb_graph_node_class_physical_range(t_physical_tile_type_ptr physical_tile,
const t_sub_tile* sub_tile,
@@ -358,15 +327,11 @@ std::vector get_tile_root_classes(t_physical_tile_type_ptr physical_type);
/**
* Get the number of all classes, on the tile and inside the cluster.
- * @param physical_type
- * @return
*/
t_class_range get_flat_tile_primitive_classes(t_physical_tile_type_ptr physical_type);
/** **/
int get_tile_class_max_ptc(t_physical_tile_type_ptr tile, bool is_flat);
-/* */
-
/* Access information related to pins */
/** get information given pin physical number **/
@@ -434,8 +399,6 @@ int get_edge_sw_arch_idx(t_physical_tile_type_ptr physical_tile,
const t_pb_graph_node* get_pb_graph_node_from_pin_physical_num(t_physical_tile_type_ptr physical_type,
int pin_physical_num);
-int get_total_num_sub_tile_internal_pins(const t_sub_tile* sub_tile);
-
int get_tile_pin_max_ptc(t_physical_tile_type_ptr tile, bool is_flat);
int get_tile_num_internal_pin(t_physical_tile_type_ptr tile);
@@ -459,11 +422,6 @@ float get_pin_primitive_comb_delay(t_physical_tile_type_ptr physical_type,
/**
* @brief This function is used during reachability analysis to check whether two classes should be put in the same group
- * @param physical_tile
- * @param first_class_ptc_num
- * @param second_class_ptc_num
- * @param is_flat
- * @return
*/
bool classes_in_same_block(t_physical_tile_type_ptr physical_tile,
int first_class_ptc_num,
@@ -473,15 +431,8 @@ bool classes_in_same_block(t_physical_tile_type_ptr physical_tile,
/**
* @brief Given the sink group, identify the pins which can reach both sink_ptc_num and at least one of the sinks,
* in the grp.
- * @param physical_tile
- * @param sink_ptc_num
- * @param grp
* @return Key is the pin number and value is the number of sinks, including sink_ptc_num, in the grp reachable by the pin
*/
std::map get_sink_choking_points(t_physical_tile_type_ptr physical_tile,
int sink_ptc_num,
const std::vector& grp);
-
-/* */
-
-#endif
diff --git a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp
index 8737503da3b..50840cbb948 100644
--- a/libs/libarchfpga/src/read_fpga_interchange_arch.cpp
+++ b/libs/libarchfpga/src/read_fpga_interchange_arch.cpp
@@ -5,28 +5,28 @@
#ifdef VTR_ENABLE_CAPNPROTO
-# include
-# include
-# include
-# include