verilog-to-routing
diff --git a/‎doc/src/vpr/command_line_usage.rst
Lines changed: 40 additions & 5 deletions b/‎doc/src/vpr/command_line_usage.rst
Lines changed: 40 additions & 5 deletions
diff --git a/‎doc/src/z_references.bib
Lines changed: 43 additions & 0 deletions b/‎doc/src/z_references.bib
Lines changed: 43 additions & 0 deletions
diff --git a/‎libs/EXTERNAL/libcatch2 b/‎libs/EXTERNAL/libcatch2
diff --git a/‎libs/libvtrutil/src/vtr_thread_pool.h
Lines changed: 159 additions & 0 deletions b/‎libs/libvtrutil/src/vtr_thread_pool.h
Lines changed: 159 additions & 0 deletions
diff --git a/‎vpr/src/analytical_place/analytical_placement_flow.cpp
Lines changed: 2 additions & 1 deletion b/‎vpr/src/analytical_place/analytical_placement_flow.cpp
Lines changed: 2 additions & 1 deletion
@@ -926,6 +926,13 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe
 
     **Default:** ``move_block_type``
 
+.. option:: --place_quench_only {on | off}
+    
+    If this option is set to ``on``, the placement will skip the annealing phase and only perform the placement quench.
+    This option is useful when the the quality of initial placement is good enough and there is no need to perform the 
+    annealing phase.
+
+    **Default:** ``off``
 
 
 .. option:: --placer_debug_block <int>
@@ -1188,15 +1195,43 @@ Analytical Placement is generally split into three stages:
 
     Analytical Placement is experimental and under active development.
 
-.. option:: --ap_global_placer {quadratic-bipartitioning-lookahead | quadratic-flowbased-lookahead}
+.. option:: --ap_analytical_solver {qp-hybrid | lp-b2b}
+
+    Controls which Analytical Solver the Global Placer will use in the AP Flow.
+    The Analytical Solver solves for a placement which optimizes some objective
+    function, ignorant of the FPGA legality constraints. This provides a "lower-
+    bound" solution. The Global Placer will legalize this solution and feed it
+    back to the analytical solver to make its solution more legal.
+
+    * ``qp-hybrid`` Solves for a placement that minimizes the quadratic HPWL of
+      the flat placement using a hybrid clique/star net model (as described in
+      FastPlace :cite:`Viswanathan2005_FastPlace`).
+      Uses the legalized solution as anchor-points to pull the solution to a
+      more legal solution (similar to the approach from SimPL :cite:`Kim2013_SimPL`).
+
+    * ``lp-b2b`` Solves for a placement that minimizes the linear HPWL of the
+      flat placement using the Bound2Bound net model (as described in Kraftwerk2 :cite:`Spindler2008_Kraftwerk2`).
+      Uses the legalized solution as anchor-points to pull the solution to a
+      more legal solution (similar to the approach from SimPL :cite:`Kim2013_SimPL`).
+
+    **Default:** ``lp-b2b``
+
+.. option:: --ap_partial_legalizer {bipartitioning | flow-based}
 
-    Controls which Global Placer to use in the AP Flow.
+    Controls which Partial Legalizer the Global Placer will use in the AP Flow.
+    The Partial Legalizer legalizes a placement generated by an Analytical Solver.
+    It is used within the Global Placer to guide the solver to a more legal
+    solution.
 
-    * ``quadratic-bipartitioning-lookahead`` Use a Global Placer which uses a quadratic solver and a bi-partitioning lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution.
+    * ``bipartitioning`` Creates minimum windows around over-dense regions of
+      the device bi-partitions the atoms in these windows such that the region
+      is no longer over-dense and the atoms are in tiles that they can be placed
+      into.
 
-    * ``quadratic-flowbased-lookahead`` Use a Global Placer which uses a quadratic solver and a multi-commodity-flow-based lookahead legalizer. Anchor points are used to spread the solved solution to the legalized solution.
+    * ``flow-based`` Flows atoms from regions that are overfilled to regions that
+      are underfilled.
 
-    **Default:** ``quadratic-bipartitioning-lookahead``
+    **Default:** ``bipartitioning``
 
 .. option:: --ap_full_legalizer {naive | appack}
 
 
@@ -436,3 +436,46 @@ @inproceedings{kosar2024parallel
   booktitle={The 23rd International Conference on Field-Programmable Technology},
   year={2024}
 }
+
+@ARTICLE{Viswanathan2005_FastPlace,
+    author={Viswanathan, N. and Chu, C.C.-N.},
+    journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+    title={{FastPlace}: efficient analytical placement using cell shifting, iterative local refinement,and a hybrid net model},
+    year={2005},
+    volume={24},
+    number={5},
+    month=may,
+    pages={722-733},
+    keywords={Clustering algorithms;Partitioning algorithms;Algorithm design and analysis;Integrated circuit interconnections;Large-scale systems;Minimization;Delay;Simulated annealing;Iterative algorithms;Acceleration;Analytical placement;computer-aided design;net models;standard cell placement},
+    doi={10.1109/TCAD.2005.846365}
+}
+
+@article{Kim2013_SimPL,
+    author = {Kim, Myung-Chul and Lee, Dong-Jin and Markov, Igor L.},
+    journal = {Commun. ACM},
+    title = {{SimPL}: an algorithm for placing {VLSI} circuits},
+    year = {2013},
+    issue_date = {June 2013},
+    publisher = {Association for Computing Machinery},
+    address = {New York, NY, USA},
+    volume = {56},
+    number = {6},
+    issn = {0001-0782},
+    doi = {10.1145/2461256.2461279},
+    month = jun,
+    pages = {105–113},
+    numpages = {9}
+}
+
+@ARTICLE{Spindler2008_Kraftwerk2,
+    author={Spindler, Peter and Schlichtmann, Ulf and Johannes, Frank M.},
+    journal={IEEE Transactions on Computer-Aided Design of Integrated Circuits and Systems},
+    title={Kraftwerk2—A Fast Force-Directed Quadratic Placement Approach Using an Accurate Net Model},
+    year={2008},
+    volume={27},
+    number={8},
+    month=aug,
+    pages={1398-1411},
+    keywords={Cost function;Central Processing Unit;Runtime;Quality control;Convergence;Computational efficiency;Integrated circuit synthesis;Stochastic processes;Circuit simulation;Bound2Bound;force-directed;half-perimeter wirelength (HPWL);Kraftwerk2;quadratic placement;Kraftwerk2;force-directed;quadratic placement;Bound2Bound;HPWL},
+    doi={10.1109/TCAD.2008.925783}
+}
@@ -0,0 +1,159 @@
+#pragma once
+
+/** 
+ * @file vtr_thread_pool.h
+ * @brief A generic thread pool for parallel task execution
+ */
+
+#include <thread>
+#include <queue>
+#include <mutex>
+#include <condition_variable>
+#include <memory>
+#include <atomic>
+#include <functional>
+#include <cstddef>
+#include <vector>
+#include "vtr_log.h"
+#include "vtr_time.h"
+
+namespace vtr {
+
+/**
+ * A thread pool for parallel task execution. It is a naive
+ * implementation which uses a queue for each thread and assigns
+ * tasks in a round robin fashion.
+ *
+ * Example usage:
+ *
+ * vtr::thread_pool pool(4);
+ * pool.schedule_work([]{
+ *     // Task body
+ * });
+ * pool.wait_for_all(); // There's no API to wait for a single task
+ */
+class thread_pool {
+  private:
+    /* Thread-local data */
+    struct ThreadData {
+        std::thread thread;
+        /* Per-thread task queue */
+        std::queue<std::function<void()>> task_queue;
+
+        /* Threads wait on cv for a stop signal or a new task
+         * queue_mutex is required for condition variable */
+        std::mutex queue_mutex;
+        std::condition_variable cv;
+        bool stop = false;
+    };
+
+    /* Container for thread-local data */
+    std::vector<std::unique_ptr<ThreadData>> threads;
+    /* Used for round-robin scheduling */
+    std::atomic<size_t> next_thread{0};
+    /* Used for wait_for_all */
+    std::atomic<size_t> active_tasks{0};
+
+    /* Condition variable for wait_for_all */
+    std::mutex completion_mutex;
+    std::condition_variable completion_cv;
+
+  public:
+    thread_pool(size_t thread_count) {
+        threads.reserve(thread_count);
+
+        for (size_t i = 0; i < thread_count; i++) {
+            auto thread_data = std::make_unique<ThreadData>();
+
+            thread_data->thread = std::thread([&]() {
+                ThreadData* td = thread_data.get();
+
+                while (true) {
+                    std::function<void()> task;
+
+                    { /* Wait until a task is available or stop signal is received */
+                        std::unique_lock<std::mutex> lock(td->queue_mutex);
+
+                        td->cv.wait(lock, [td]() {
+                            return td->stop || !td->task_queue.empty();
+                        });
+
+                        if (td->stop && td->task_queue.empty()) {
+                            return;
+                        }
+
+                        /* Fetch a task from the queue */
+                        task = std::move(td->task_queue.front());
+                        td->task_queue.pop();
+                    }
+
+                    vtr::Timer task_timer;
+                    task();
+                }
+            });
+
+            threads.push_back(std::move(thread_data));
+        }
+    }
+
+    template<typename F>
+    void schedule_work(F&& f) {
+        active_tasks++;
+
+        /* Round-robin thread assignment */
+        size_t thread_idx = (next_thread++) % threads.size();
+        auto thread_data = threads[thread_idx].get();
+
+        auto task = [this, f = std::forward<F>(f)]() {
+            vtr::Timer task_timer;
+
+            try {
+                f();
+            } catch (const std::exception& e) {
+                VTR_LOG_ERROR("Thread %zu failed task with error: %s\n",
+                              std::this_thread::get_id(), e.what());
+                throw;
+            } catch (...) {
+                VTR_LOG_ERROR("Thread %zu failed task with unknown error\n",
+                              std::this_thread::get_id());
+                throw;
+            }
+
+            size_t remaining = --active_tasks;
+            if (remaining == 0) {
+                completion_cv.notify_all();
+            }
+        };
+
+        /* Queue new task */
+        {
+            std::lock_guard<std::mutex> lock(thread_data->queue_mutex);
+            thread_data->task_queue.push(std::move(task));
+        }
+        thread_data->cv.notify_one();
+    }
+
+    void wait_for_all() {
+        std::unique_lock<std::mutex> lock(completion_mutex);
+        completion_cv.wait(lock, [this]() { return active_tasks == 0; });
+    }
+
+    ~thread_pool() {
+        /* Stop all threads */
+        for (auto& thread_data : threads) {
+            {
+                std::lock_guard<std::mutex> lock(thread_data->queue_mutex);
+                thread_data->stop = true;
+            }
+            thread_data->cv.notify_one();
+        }
+
+        for (auto& thread_data : threads) {
+            if (thread_data->thread.joinable()) {
+                thread_data->thread.join();
+            }
+        }
+    }
+};
+
+} // namespace vtr
@@ -131,7 +131,8 @@ static PartialPlacement run_global_placer(const t_ap_opts& ap_opts,
         return p_placement;
     } else {
         // Run the Global Placer
-        std::unique_ptr<GlobalPlacer> global_placer = make_global_placer(ap_opts.global_placer_type,
+        std::unique_ptr<GlobalPlacer> global_placer = make_global_placer(ap_opts.analytical_solver_type,
+                                                                         ap_opts.partial_legalizer_type,
                                                                          ap_netlist,
                                                                          prepacker,
                                                                          atom_nlist,