diff --git a/libs/libarchfpga/src/physical_types_util.h b/libs/libarchfpga/src/physical_types_util.h
index e27ba096b54..4d9c3013682 100644
--- a/libs/libarchfpga/src/physical_types_util.h
+++ b/libs/libarchfpga/src/physical_types_util.h
@@ -171,7 +171,7 @@ std::vector<std::string> block_type_class_index_to_pin_names(t_physical_tile_typ
                                                              bool is_flat);
 
 ///@brief Returns the physical tile type matching a given physical tile type name, or nullptr (if not found)
-t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types);
+t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector<t_physical_tile_type>& types);
 
 int find_pin_class(t_physical_tile_type_ptr type, std::string port_name, int pin_index_in_port, e_pin_type pin_type);
 
diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index eecec4d39ce..a93b648f87b 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -737,6 +737,7 @@ static void SetupNocOpts(const t_options& Options, t_noc_opts* NocOpts) {
     NocOpts->noc_placement_weighting = Options.noc_placement_weighting;
     NocOpts->noc_latency_constraints_weighting = Options.noc_latency_constraints_weighting;
     NocOpts->noc_latency_weighting = Options.noc_latency_weighting;
+    NocOpts->noc_congestion_weighting = Options.noc_congestion_weighting;
     NocOpts->noc_swap_percentage = Options.noc_swap_percentage;
     NocOpts->noc_placement_file_name = Options.noc_placement_file_name;
 
diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 904ee63f0d4..dc3ef848b47 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -794,6 +794,7 @@ static void ShowNocOpts(const t_noc_opts& NocOpts) {
     VTR_LOG("NocOpts.noc_placement_weighting: %f\n", NocOpts.noc_placement_weighting);
     VTR_LOG("NocOpts.noc_latency_constraints_weighting: %f\n", NocOpts.noc_latency_constraints_weighting);
     VTR_LOG("NocOpts.noc_latency_weighting: %f\n", NocOpts.noc_latency_weighting);
+    VTR_LOG("NocOpts.noc_congestion_weighting: %f\n", NocOpts.noc_congestion_weighting);
     VTR_LOG("NocOpts.noc_swap_percentage: %d%%\n", NocOpts.noc_swap_percentage);
     VTR_LOG("NocOpts.noc_routing_algorithm: %s\n", NocOpts.noc_placement_file_name.c_str());
     VTR_LOG("\n");
diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index b1916852a34..186193744ce 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -5,6 +5,7 @@
 #include <climits>
 #include <cstdlib>
 #include <cmath>
+#include <algorithm>
 
 #include "vtr_util.h"
 #include "vtr_memory.h"
@@ -425,7 +426,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
  * is used to determine if the channel width should be rounded to an
  * even number.
  */
-t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_type graph_directionality) {
+t_chan_width init_chan(int cfactor, const t_chan_width_dist& chan_width_dist, t_graph_type graph_directionality) {
     auto& device_ctx = g_vpr_ctx.mutable_device();
     auto& grid = device_ctx.grid;
 
@@ -460,19 +461,15 @@ t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_t
         }
     }
 
-    chan_width.max = 0;
-    chan_width.x_max = chan_width.y_max = INT_MIN;
-    chan_width.x_min = chan_width.y_min = INT_MAX;
-    for (size_t i = 0; i < grid.height(); ++i) {
-        chan_width.x_max = std::max(chan_width.x_max, chan_width.x_list[i]);
-        chan_width.x_min = std::min(chan_width.x_min, chan_width.x_list[i]);
-    }
-    chan_width.max = std::max(chan_width.max, chan_width.x_max);
-    for (size_t i = 0; i < grid.width(); ++i) {
-        chan_width.y_max = std::max(chan_width.y_max, chan_width.y_list[i]);
-        chan_width.y_min = std::min(chan_width.y_min, chan_width.y_list[i]);
-    }
-    chan_width.max = std::max(chan_width.max, chan_width.y_max);
+    auto minmax = std::minmax_element(chan_width.x_list.begin(), chan_width.x_list.end());
+    chan_width.x_min = *minmax.first;
+    chan_width.x_max = *minmax.second;
+
+    minmax = std::minmax_element(chan_width.y_list.begin(), chan_width.y_list.end());
+    chan_width.y_min = *minmax.first;
+    chan_width.y_max = *minmax.second;
+
+    chan_width.max = std::max(chan_width.x_max, chan_width.y_max);
 
 #ifdef VERBOSE
     VTR_LOG("\n");
diff --git a/vpr/src/base/place_and_route.h b/vpr/src/base/place_and_route.h
index 3ec8ca9030c..7a59fa02795 100644
--- a/vpr/src/base/place_and_route.h
+++ b/vpr/src/base/place_and_route.h
@@ -40,7 +40,9 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                                   std::shared_ptr<RoutingDelayCalculator> delay_calc,
                                   bool is_flat);
 
-t_chan_width init_chan(int cfactor, t_chan_width_dist chan_width_dist, t_graph_type graph_directionality);
+t_chan_width init_chan(int cfactor,
+                       const t_chan_width_dist& chan_width_dist,
+                       t_graph_type graph_directionality);
 
 void post_place_sync();
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index e75780e51e5..7765bf5acf6 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -62,7 +62,7 @@ struct ParseOnOff {
 };
 
 struct ParseArchFormat {
-    ConvertedValue<e_arch_format> from_str(std::string str) {
+    ConvertedValue<e_arch_format> from_str(const std::string& str) {
         ConvertedValue<e_arch_format> conv_value;
         if (str == "vtr")
             conv_value.set_value(e_arch_format::VTR);
@@ -94,7 +94,7 @@ struct ParseArchFormat {
     }
 };
 struct ParseCircuitFormat {
-    ConvertedValue<e_circuit_format> from_str(std::string str) {
+    ConvertedValue<e_circuit_format> from_str(const std::string& str) {
         ConvertedValue<e_circuit_format> conv_value;
         if (str == "auto")
             conv_value.set_value(e_circuit_format::AUTO);
@@ -134,7 +134,7 @@ struct ParseCircuitFormat {
     }
 };
 struct ParseRoutePredictor {
-    ConvertedValue<e_routing_failure_predictor> from_str(std::string str) {
+    ConvertedValue<e_routing_failure_predictor> from_str(const std::string& str) {
         ConvertedValue<e_routing_failure_predictor> conv_value;
         if (str == "safe")
             conv_value.set_value(SAFE);
@@ -170,7 +170,7 @@ struct ParseRoutePredictor {
 };
 
 struct ParseRouterAlgorithm {
-    ConvertedValue<e_router_algorithm> from_str(std::string str) {
+    ConvertedValue<e_router_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_router_algorithm> conv_value;
         if (str == "parallel")
             conv_value.set_value(PARALLEL);
@@ -201,7 +201,7 @@ struct ParseRouterAlgorithm {
 };
 
 struct ParseNodeReorderAlgorithm {
-    ConvertedValue<e_rr_node_reorder_algorithm> from_str(std::string str) {
+    ConvertedValue<e_rr_node_reorder_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_rr_node_reorder_algorithm> conv_value;
         if (str == "none")
             conv_value.set_value(DONT_REORDER);
@@ -236,7 +236,7 @@ struct ParseNodeReorderAlgorithm {
 };
 
 struct RouteBudgetsAlgorithm {
-    ConvertedValue<e_routing_budgets_algorithm> from_str(std::string str) {
+    ConvertedValue<e_routing_budgets_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_routing_budgets_algorithm> conv_value;
         if (str == "minimax")
             conv_value.set_value(MINIMAX);
@@ -276,7 +276,7 @@ struct RouteBudgetsAlgorithm {
 };
 
 struct ParseRouteType {
-    ConvertedValue<e_route_type> from_str(std::string str) {
+    ConvertedValue<e_route_type> from_str(const std::string& str) {
         ConvertedValue<e_route_type> conv_value;
         if (str == "global")
             conv_value.set_value(GLOBAL);
@@ -307,7 +307,7 @@ struct ParseRouteType {
 };
 
 struct ParseBaseCost {
-    ConvertedValue<e_base_cost_type> from_str(std::string str) {
+    ConvertedValue<e_base_cost_type> from_str(const std::string& str) {
         ConvertedValue<e_base_cost_type> conv_value;
         if (str == "delay_normalized")
             conv_value.set_value(DELAY_NORMALIZED);
@@ -358,7 +358,7 @@ struct ParseBaseCost {
 };
 
 struct ParsePlaceDeltaDelayAlgorithm {
-    ConvertedValue<e_place_delta_delay_algorithm> from_str(std::string str) {
+    ConvertedValue<e_place_delta_delay_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_place_delta_delay_algorithm> conv_value;
         if (str == "astar")
             conv_value.set_value(e_place_delta_delay_algorithm::ASTAR_ROUTE);
@@ -389,7 +389,7 @@ struct ParsePlaceDeltaDelayAlgorithm {
 };
 
 struct ParsePlaceAlgorithm {
-    ConvertedValue<e_place_algorithm> from_str(std::string str) {
+    ConvertedValue<e_place_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_place_algorithm> conv_value;
         if (str == "bounding_box") {
             conv_value.set_value(BOUNDING_BOX_PLACE);
@@ -431,7 +431,7 @@ struct ParsePlaceAlgorithm {
 };
 
 struct ParsePlaceBoundingBox {
-    ConvertedValue<e_place_bounding_box_mode> from_str(std::string str) {
+    ConvertedValue<e_place_bounding_box_mode> from_str(const std::string& str) {
         ConvertedValue<e_place_bounding_box_mode> conv_value;
         if (str == "auto_bb") {
             conv_value.set_value(AUTO_BB);
@@ -466,7 +466,7 @@ struct ParsePlaceBoundingBox {
 };
 
 struct ParsePlaceAgentAlgorithm {
-    ConvertedValue<e_agent_algorithm> from_str(std::string str) {
+    ConvertedValue<e_agent_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_agent_algorithm> conv_value;
         if (str == "e_greedy")
             conv_value.set_value(E_GREEDY);
@@ -497,7 +497,7 @@ struct ParsePlaceAgentAlgorithm {
 };
 
 struct ParsePlaceAgentSpace {
-    ConvertedValue<e_agent_space> from_str(std::string str) {
+    ConvertedValue<e_agent_space> from_str(const std::string& str) {
         ConvertedValue<e_agent_space> conv_value;
         if (str == "move_type")
             conv_value.set_value(e_agent_space::MOVE_TYPE);
@@ -528,7 +528,7 @@ struct ParsePlaceAgentSpace {
 };
 
 struct ParseFixPins {
-    ConvertedValue<e_pad_loc_type> from_str(std::string str) {
+    ConvertedValue<e_pad_loc_type> from_str(const std::string& str) {
         ConvertedValue<e_pad_loc_type> conv_value;
         if (str == "free")
             conv_value.set_value(FREE);
@@ -559,7 +559,7 @@ struct ParseFixPins {
 };
 
 struct ParseClusterSeed {
-    ConvertedValue<e_cluster_seed> from_str(std::string str) {
+    ConvertedValue<e_cluster_seed> from_str(const std::string& str) {
         ConvertedValue<e_cluster_seed> conv_value;
         if (str == "timing")
             conv_value.set_value(e_cluster_seed::TIMING);
@@ -606,7 +606,7 @@ struct ParseClusterSeed {
 };
 
 struct ParseConstantNetMethod {
-    ConvertedValue<e_constant_net_method> from_str(std::string str) {
+    ConvertedValue<e_constant_net_method> from_str(const std::string& str) {
         ConvertedValue<e_constant_net_method> conv_value;
         if (str == "global")
             conv_value.set_value(CONSTANT_NET_GLOBAL);
@@ -637,7 +637,7 @@ struct ParseConstantNetMethod {
 };
 
 struct ParseTimingReportDetail {
-    ConvertedValue<e_timing_report_detail> from_str(std::string str) {
+    ConvertedValue<e_timing_report_detail> from_str(const std::string& str) {
         ConvertedValue<e_timing_report_detail> conv_value;
         if (str == "netlist")
             conv_value.set_value(e_timing_report_detail::NETLIST);
@@ -677,7 +677,7 @@ struct ParseTimingReportDetail {
 };
 
 struct ParseClockModeling {
-    ConvertedValue<e_clock_modeling> from_str(std::string str) {
+    ConvertedValue<e_clock_modeling> from_str(const std::string& str) {
         ConvertedValue<e_clock_modeling> conv_value;
         if (str == "ideal")
             conv_value.set_value(IDEAL_CLOCK);
@@ -715,7 +715,7 @@ struct ParseClockModeling {
 };
 
 struct ParseUnrelatedClustering {
-    ConvertedValue<e_unrelated_clustering> from_str(std::string str) {
+    ConvertedValue<e_unrelated_clustering> from_str(const std::string& str) {
         ConvertedValue<e_unrelated_clustering> conv_value;
         if (str == "on")
             conv_value.set_value(e_unrelated_clustering::ON);
@@ -753,7 +753,7 @@ struct ParseUnrelatedClustering {
 };
 
 struct ParseBalanceBlockTypeUtil {
-    ConvertedValue<e_balance_block_type_util> from_str(std::string str) {
+    ConvertedValue<e_balance_block_type_util> from_str(const std::string& str) {
         ConvertedValue<e_balance_block_type_util> conv_value;
         if (str == "on")
             conv_value.set_value(e_balance_block_type_util::ON);
@@ -791,7 +791,7 @@ struct ParseBalanceBlockTypeUtil {
 };
 
 struct ParseConstGenInference {
-    ConvertedValue<e_const_gen_inference> from_str(std::string str) {
+    ConvertedValue<e_const_gen_inference> from_str(const std::string& str) {
         ConvertedValue<e_const_gen_inference> conv_value;
         if (str == "none")
             conv_value.set_value(e_const_gen_inference::NONE);
@@ -829,7 +829,7 @@ struct ParseConstGenInference {
 };
 
 struct ParseIncrRerouteDelayRipup {
-    ConvertedValue<e_incr_reroute_delay_ripup> from_str(std::string str) {
+    ConvertedValue<e_incr_reroute_delay_ripup> from_str(const std::string& str) {
         ConvertedValue<e_incr_reroute_delay_ripup> conv_value;
         if (str == "on")
             conv_value.set_value(e_incr_reroute_delay_ripup::ON);
@@ -867,7 +867,7 @@ struct ParseIncrRerouteDelayRipup {
 };
 
 struct ParseRouteBBUpdate {
-    ConvertedValue<e_route_bb_update> from_str(std::string str) {
+    ConvertedValue<e_route_bb_update> from_str(const std::string& str) {
         ConvertedValue<e_route_bb_update> conv_value;
         if (str == "static")
             conv_value.set_value(e_route_bb_update::STATIC);
@@ -944,7 +944,7 @@ struct ParseRouterLookahead {
 };
 
 struct ParsePlaceDelayModel {
-    ConvertedValue<PlaceDelayModelType> from_str(std::string str) {
+    ConvertedValue<PlaceDelayModelType> from_str(const std::string& str) {
         ConvertedValue<PlaceDelayModelType> conv_value;
         if (str == "simple") {
             conv_value.set_value(PlaceDelayModelType::SIMPLE);
@@ -982,7 +982,7 @@ struct ParsePlaceDelayModel {
 };
 
 struct ParseReducer {
-    ConvertedValue<e_reducer> from_str(std::string str) {
+    ConvertedValue<e_reducer> from_str(const std::string& str) {
         ConvertedValue<e_reducer> conv_value;
         if (str == "min")
             conv_value.set_value(e_reducer::MIN);
@@ -1025,7 +1025,7 @@ struct ParseReducer {
 };
 
 struct ParseRouterFirstIterTiming {
-    ConvertedValue<e_router_initial_timing> from_str(std::string str) {
+    ConvertedValue<e_router_initial_timing> from_str(const std::string& str) {
         ConvertedValue<e_router_initial_timing> conv_value;
         if (str == "all_critical")
             conv_value.set_value(e_router_initial_timing::ALL_CRITICAL);
@@ -1056,7 +1056,7 @@ struct ParseRouterFirstIterTiming {
 };
 
 struct ParseRouterHeap {
-    ConvertedValue<e_heap_type> from_str(std::string str) {
+    ConvertedValue<e_heap_type> from_str(const std::string& str) {
         ConvertedValue<e_heap_type> conv_value;
         if (str == "binary")
             conv_value.set_value(e_heap_type::BINARY_HEAP);
@@ -1087,7 +1087,7 @@ struct ParseRouterHeap {
 };
 
 struct ParseCheckRoute {
-    ConvertedValue<e_check_route_option> from_str(std::string str) {
+    ConvertedValue<e_check_route_option> from_str(const std::string& str) {
         ConvertedValue<e_check_route_option> conv_value;
         if (str == "off")
             conv_value.set_value(e_check_route_option::OFF);
@@ -1122,7 +1122,7 @@ struct ParseCheckRoute {
 };
 
 struct ParsePlaceEfforScaling {
-    ConvertedValue<e_place_effort_scaling> from_str(std::string str) {
+    ConvertedValue<e_place_effort_scaling> from_str(const std::string& str) {
         ConvertedValue<e_place_effort_scaling> conv_value;
         if (str == "circuit")
             conv_value.set_value(e_place_effort_scaling::CIRCUIT);
@@ -1153,7 +1153,7 @@ struct ParsePlaceEfforScaling {
 };
 
 struct ParseTimingUpdateType {
-    ConvertedValue<e_timing_update_type> from_str(std::string str) {
+    ConvertedValue<e_timing_update_type> from_str(const std::string& str) {
         ConvertedValue<e_timing_update_type> conv_value;
         if (str == "auto")
             conv_value.set_value(e_timing_update_type::AUTO);
@@ -1188,7 +1188,7 @@ struct ParseTimingUpdateType {
 };
 
 struct ParsePostSynthNetlistUnconnInputHandling {
-    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(std::string str) {
+    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(const std::string& str) {
         ConvertedValue<e_post_synth_netlist_unconn_handling> conv_value;
         if (str == "unconnected")
             conv_value.set_value(e_post_synth_netlist_unconn_handling::UNCONNECTED);
@@ -1227,7 +1227,7 @@ struct ParsePostSynthNetlistUnconnInputHandling {
 };
 
 struct ParsePostSynthNetlistUnconnOutputHandling {
-    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(std::string str) {
+    ConvertedValue<e_post_synth_netlist_unconn_handling> from_str(const std::string& str) {
         ConvertedValue<e_post_synth_netlist_unconn_handling> conv_value;
         if (str == "unconnected")
             conv_value.set_value(e_post_synth_netlist_unconn_handling::UNCONNECTED);
@@ -2813,22 +2813,40 @@ argparse::ArgumentParser create_arg_parser(std::string prog_name, t_options& arg
     noc_grp.add_argument<double>(args.noc_placement_weighting, "--noc_placement_weighting")
         .help(
             "Controls the importance of the NoC placement parameters relative to timing and wirelength of the design."
-            "This value can be >=0, where 0 would mean the placement is based solely on timing and wirelength, a value of 1 would mean noc placement is considered equal to timing and wirelength and a value greater than 1 would mean the placement is increasingly dominated by NoC parameters.")
-        .default_value("0.6")
+            "This value can be >=0, where 0 would mean the placement is based solely on timing and wirelength."
+            "A value of 1 would mean noc placement is considered equal to timing and wirelength"
+            "A value greater than 1 would mean the placement is increasingly dominated by NoC parameters.")
+        .default_value("5.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_constraints_weighting, "--noc_latency_constraints_weighting")
         .help(
-            "Controls the importance of meeting all the NoC traffic flow latency constraints."
-            "This value can be >=0, where 0 would mean the latency constraints have no relevance to placement, a value of 1 would mean the latency constraints are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by meeting the latency constraints of the traffic flows.")
-        .default_value("1")
+            "Controls the importance of meeting all the NoC traffic flow latency constraints.\n"
+            "This value can be >=0, where 0 would mean the latency constraints have no relevance to placement.\n"
+            "Other positive numbers specify the importance of meeting latency constraints to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
+        .default_value("0.6")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_latency_weighting, "--noc_latency_weighting")
         .help(
-            "Controls the importance of reducing the latencies of the NoC traffic flows."
-            "This value can be >=0, where 0 would mean the latencies have no relevance to placement, a value of 1 would mean the latencies  are weighted equally to the sum of other placement cost components and a value greater than 1 would mean the placement is increasingly dominated by reducing the latencies of the traffic flows.")
-        .default_value("0.05")
+            "Controls the importance of reducing the latencies of the NoC traffic flows.\n"
+            "This value can be >=0, where 0 would mean the latencies have no relevance to placement.\n"
+            "Other positive numbers specify the importance of minimizing aggregate latency to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
+        .default_value("0.02")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
+    noc_grp.add_argument<double>(args.noc_congestion_weighting, "--noc_congestion_weighting")
+        .help(
+            "Controls the importance of reducing the congestion of the NoC links.\n"
+            "This value can be >=0, where 0 would mean the congestion has no relevance to placement.\n"
+            "Other positive numbers specify the importance of minimizing congestion to other NoC-related cost terms.\n"
+            "Weighting factors for NoC-related cost terms are normalized internally. Therefore, their absolute values are not important, and"
+            "only their relative ratios determine the importance of each cost term.")
+        .default_value("0.00")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     noc_grp.add_argument<double>(args.noc_swap_percentage, "--noc_swap_percentage")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index d1edc5ef2b2..e6476ba151e 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -155,6 +155,7 @@ struct t_options {
     argparse::ArgValue<double> noc_placement_weighting;
     argparse::ArgValue<double> noc_latency_constraints_weighting;
     argparse::ArgValue<double> noc_latency_weighting;
+    argparse::ArgValue<double> noc_congestion_weighting;
     argparse::ArgValue<double> noc_swap_percentage;
     argparse::ArgValue<std::string> noc_placement_file_name;
 
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 1e4684ae683..47733286088 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -251,7 +251,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a
      * Initialize the functions names for which VPR_ERRORs
      * are demoted to VTR_LOG_WARNs
      */
-    for (std::string func_name : vtr::split(options->disable_errors, std::string(":"))) {
+    for (const std::string& func_name : vtr::split(options->disable_errors, std::string(":"))) {
         map_error_activation_status(func_name);
     }
 
@@ -272,7 +272,7 @@ void vpr_init_with_options(const t_options* options, t_vpr_setup* vpr_setup, t_a
     }
 
     set_noisy_warn_log_file(warn_log_file);
-    for (std::string func_name : vtr::split(warn_functions, std::string(":"))) {
+    for (const std::string& func_name : vtr::split(warn_functions, std::string(":"))) {
         add_warnings_to_suppress(func_name);
     }
 
@@ -559,12 +559,12 @@ void vpr_setup_noc(const t_vpr_setup& vpr_setup, const t_arch& arch) {
  * @param noc_routing_algorithm_name A user provided string that identifies a
  * NoC routing algorithm
  */
-void vpr_setup_noc_routing_algorithm(std::string noc_routing_algorithm_name) {
+void vpr_setup_noc_routing_algorithm(const std::string& noc_routing_algorithm_name) {
     // Need to be abke to modify the NoC context, since we will be adding the
     // newly created routing algorithm to it
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    noc_ctx.noc_flows_router = NocRoutingAlgorithmCreator().create_routing_algorithm(noc_routing_algorithm_name);
+    noc_ctx.noc_flows_router = NocRoutingAlgorithmCreator::create_routing_algorithm(noc_routing_algorithm_name);
     return;
 }
 
@@ -1085,7 +1085,7 @@ static void get_intercluster_switch_fanin_estimates(const t_vpr_setup& vpr_setup
 
     auto type = find_most_common_tile_type(grid);
     /* get Fc_in/out for most common block (e.g. logic blocks) */
-    VTR_ASSERT(type->fc_specs.size() > 0);
+    VTR_ASSERT(!type->fc_specs.empty());
 
     //Estimate the maximum Fc_in/Fc_out
 
@@ -1208,10 +1208,7 @@ static void free_routing() {
 /**
  * @brief handles the deletion of NoC related datastructures.
  */
-static void free_noc() {
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
-    delete noc_ctx.noc_flows_router;
-}
+static void free_noc() {}
 
 void vpr_free_vpr_data_structures(t_arch& Arch,
                                   t_vpr_setup& vpr_setup) {
diff --git a/vpr/src/base/vpr_api.h b/vpr/src/base/vpr_api.h
index 15509be1115..b4c89e25051 100644
--- a/vpr/src/base/vpr_api.h
+++ b/vpr/src/base/vpr_api.h
@@ -144,7 +144,7 @@ void vpr_close_graphics(const t_vpr_setup& vpr_setup);
 void vpr_setup_clock_networks(t_vpr_setup& vpr_setup, const t_arch& Arch);
 
 void vpr_setup_noc(const t_vpr_setup& vpr_setup, const t_arch& arch);
-void vpr_setup_noc_routing_algorithm(std::string noc_routing_algorithm_name);
+void vpr_setup_noc_routing_algorithm(const std::string& noc_routing_algorithm_name);
 
 void vpr_free_vpr_data_structures(t_arch& Arch, t_vpr_setup& vpr_setup);
 void vpr_free_all(t_arch& Arch,
diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h
index 6a07f367e13..18420590f2e 100644
--- a/vpr/src/base/vpr_context.h
+++ b/vpr/src/base/vpr_context.h
@@ -546,7 +546,7 @@ struct NocContext : public Context {
      *
      * This is created from a user supplied command line option "--noc_routing_algorithm"
      */
-    NocRouting* noc_flows_router;
+    std::unique_ptr<NocRouting> noc_flows_router;
 };
 
 /**
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 94c0502a836..185c1c4229f 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1498,9 +1498,11 @@ struct t_noc_opts {
     std::string noc_flows_file;               ///<name of the file that contains all the traffic flow information to be sent over the NoC in this design
     std::string noc_routing_algorithm;        ///<controls the routing algorithm used to route packets within the NoC
     double noc_placement_weighting;           ///<controls the significance of the NoC placement cost relative to the total placement cost range:[0-inf)
-    double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow contraints range:[0-inf)
+    double noc_aggregate_bandwidth_weighting; ///<controls the significance of aggregate used bandwidth relative to other NoC placement costs:[0:-inf)
+    double noc_latency_constraints_weighting; ///<controls the significance of meeting the traffic flow constraints range:[0-inf)
     double noc_latency_weighting;             ///<controls the significance of the traffic flow latencies relative to the other NoC placement costs range:[0-inf)
-    int noc_swap_percentage;                  ///<controls the number of NoC router block swap attemps relative to the total number of swaps attempted by the placer range:[0-100]
+    double noc_congestion_weighting;          ///<controls the significance of the link congestions relative to the other NoC placement costs range:[0-inf)
+    int noc_swap_percentage;                  ///<controls the number of NoC router block swap attempts relative to the total number of swaps attempted by the placer range:[0-100]
     std::string noc_placement_file_name;      ///<is the name of the output file that contains the NoC placement information
 };
 
diff --git a/vpr/src/noc/noc_link.cpp b/vpr/src/noc/noc_link.cpp
index de15d5a4a6d..4407642ddae 100644
--- a/vpr/src/noc/noc_link.cpp
+++ b/vpr/src/noc/noc_link.cpp
@@ -1,12 +1,12 @@
 #include "noc_link.h"
 
 // constructor
-NocLink::NocLink(NocRouterId source, NocRouterId sink)
-    : source_router(source)
-    , sink_router(sink) {
-    // initialize variables
-    bandwidth_usage = 0.0;
-}
+NocLink::NocLink(NocLinkId link_id, NocRouterId source, NocRouterId sink, double bw)
+    : id(link_id)
+    , source_router(source)
+    , sink_router(sink)
+    , bandwidth_usage(0.0)
+    , bandwidth(bw) { }
 
 // getters
 NocRouterId NocLink::get_source_router(void) const {
@@ -34,4 +34,37 @@ void NocLink::set_sink_router(NocRouterId sink) {
 
 void NocLink::set_bandwidth_usage(double new_bandwidth_usage) {
     bandwidth_usage = new_bandwidth_usage;
+}
+
+void NocLink::set_bandwidth(double new_bandwidth) {
+    bandwidth = new_bandwidth;
+    return;
+}
+
+double NocLink::get_bandwidth() const {
+    return bandwidth;
+}
+
+double NocLink::get_congested_bandwidth() const {
+    double congested_bandwidth = bandwidth_usage - bandwidth;
+    congested_bandwidth = std::max(congested_bandwidth, 0.0);
+
+    VTR_ASSERT(congested_bandwidth >= 0.0);
+    return congested_bandwidth;
+}
+
+double NocLink::get_congested_bandwidth_ratio() const {
+    double congested_bw = get_congested_bandwidth();
+    double congested_bw_ratio = congested_bw / get_bandwidth();
+
+    VTR_ASSERT(congested_bw_ratio >= 0.0);
+    return congested_bw_ratio;
+}
+
+NocLinkId NocLink::get_link_id() const {
+    return id;
+}
+
+NocLink::operator NocLinkId() const {
+    return get_link_id();
 }
\ No newline at end of file
diff --git a/vpr/src/noc/noc_link.h b/vpr/src/noc/noc_link.h
index dee19cc676b..2aa5d55cd67 100644
--- a/vpr/src/noc/noc_link.h
+++ b/vpr/src/noc/noc_link.h
@@ -43,14 +43,17 @@
 
 class NocLink {
   private:
+    NocLinkId id;
+
     // the two routers that are connected by this link
     NocRouterId source_router; /*!< The router which uses this link as an outgoing edge*/
     NocRouterId sink_router;   /*!< The router which uses this link as an incoming edge*/
 
     double bandwidth_usage; /*!< Represents the bandwidth of the data being transmitted on the link. Units in bits-per-second(bps)*/
+    double bandwidth; /*!< Represents the maximum bits per second that can be transmitted over the link without causing congestion*/
 
   public:
-    NocLink(NocRouterId source_router, NocRouterId sink_router);
+    NocLink(NocLinkId link_id, NocRouterId source_router, NocRouterId sink_router, double bw);
 
     // getters
 
@@ -70,10 +73,36 @@ class NocLink {
 
     /**
      * @brief Provides the size of the data (bandwidth) being currently transmitted using the link.
-     * @return A numeric value of the bandwidth of the link
+     * @return A numeric value of the bandwidth usage of the link
      */
     double get_bandwidth_usage(void) const;
 
+    /**
+     * @brief Returns the maximum bandwidth that the link can carry without congestion.
+     * @return A numeric value of the bandwidth capacity of the link
+     */
+    double get_bandwidth(void) const;
+
+    /**
+     * @brief Calculates the extent to which the current bandwidth utilization
+     * exceeds the link capacity. Any positive value means the link is congested.
+     * @return A numeric value of the bandwidth over-utilization in the link
+     */
+    double get_congested_bandwidth(void) const;
+
+    /**
+     * @brief Computes the congested bandwidth to bandwidth capacity ratio.
+     * @return The congested bandwidth to bandwidth capacity of the link.
+     */
+    double get_congested_bandwidth_ratio() const;
+
+    /**
+     * @brief Returns the unique link ID. The ID can be used to index
+     * vtr::vector<NoCLinkId, ...> instances.
+     * @return The unique ID for the link
+     */
+    NocLinkId get_link_id() const;
+
     // setters
     /**
      * @brief Can be used to set the source router of the link to a different router. 
@@ -90,13 +119,29 @@ class NocLink {
     void set_sink_router(NocRouterId sink);
 
     /**
-     * @brief Can modify the bandwidth of the link. It is expected that when the NoC is being placed
+     * @brief Can modify the bandwidth usage of the link. It is expected that when the NoC is being placed
      * the traffic flows will be re-routed multiple times. So the links will end up being used and un-used
      * by different traffic flows and the bandwidths of the links will correspondingly change. This function
      * can be used to make those changes
-     * @param new_bandwidth_usage The new value of the bandwidth of the link
+     * @param new_bandwidth_usage The new value of the bandwidth usage of the link
      */
     void set_bandwidth_usage(double new_bandwidth_usage);
+
+    /**
+     * @brief Sets the bandwidth capacity of the link. This function should be used when
+     * global NoC data structures are created and populated. The bandwidth capacity is used
+     * along with bandwidth_usage to measure congestion.
+     * @param new_bandwidth The new value of the bandwidth of the link
+     */
+    void set_bandwidth(double new_bandwidth);
+
+    
+    /**
+     * @brief Returns the unique link ID. The ID can be used to index
+     * vtr::vector<NoCLinkId, ...> instances.
+     * @return The unique ID for the link
+     */
+    operator NocLinkId() const;
 };
 
 #endif
\ No newline at end of file
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.cpp b/vpr/src/noc/noc_routing_algorithm_creator.cpp
index 0252f1fefca..ddbd0ebb9d7 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.cpp
+++ b/vpr/src/noc/noc_routing_algorithm_creator.cpp
@@ -2,13 +2,13 @@
 #include "noc_routing_algorithm_creator.h"
 #include "vpr_error.h"
 
-NocRouting* NocRoutingAlgorithmCreator::create_routing_algorithm(std::string routing_algorithm_name) {
-    NocRouting* noc_routing_algorithm = nullptr;
+std::unique_ptr<NocRouting> NocRoutingAlgorithmCreator::create_routing_algorithm(const std::string& routing_algorithm_name) {
+    std::unique_ptr<NocRouting> noc_routing_algorithm;
 
     if (routing_algorithm_name == "xy_routing") {
-        noc_routing_algorithm = new XYRouting();
+        noc_routing_algorithm = std::make_unique<XYRouting>();
     } else if (routing_algorithm_name == "bfs_routing") {
-        noc_routing_algorithm = new BFSRouting();
+        noc_routing_algorithm = std::make_unique<BFSRouting>();
     } else {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "The provided NoC routing algorithm '%s' is not supported.", routing_algorithm_name.c_str());
     }
diff --git a/vpr/src/noc/noc_routing_algorithm_creator.h b/vpr/src/noc/noc_routing_algorithm_creator.h
index bca7b98abdc..b4361d95d33 100644
--- a/vpr/src/noc/noc_routing_algorithm_creator.h
+++ b/vpr/src/noc/noc_routing_algorithm_creator.h
@@ -18,6 +18,7 @@
  */
 
 #include <string>
+#include <memory>
 
 #include "noc_routing.h"
 #include "xy_routing.h"
@@ -39,7 +40,7 @@ class NocRoutingAlgorithmCreator {
      * NoC routing algorithm
      * @return NocRouting* A reference to the created NoC routing algorithm
      */
-    NocRouting* create_routing_algorithm(std::string routing_algorithm_name);
+    static std::unique_ptr<NocRouting> create_routing_algorithm(const std::string& routing_algorithm_name);
 };
 
 #endif
diff --git a/vpr/src/noc/noc_storage.cpp b/vpr/src/noc/noc_storage.cpp
index 70c92878f82..8438838c1f9 100644
--- a/vpr/src/noc/noc_storage.cpp
+++ b/vpr/src/noc/noc_storage.cpp
@@ -56,6 +56,19 @@ const NocLink& NocStorage::get_single_noc_link(NocLinkId id) const {
     return link_storage[id];
 }
 
+NocLinkId  NocStorage::get_single_noc_link_id(NocRouterId src_router, NocRouterId dst_router) const {
+    NocLinkId link_id = NocLinkId::INVALID();
+
+    for (const auto& link : link_storage) {
+        if (link.get_source_router() == src_router && link.get_sink_router() == dst_router) {
+            link_id = link.get_link_id();
+            break;
+        }
+    }
+
+    return link_id;
+}
+
 NocLink& NocStorage::get_single_mutable_noc_link(NocLinkId id) {
     return link_storage[id];
 }
@@ -100,10 +113,14 @@ void NocStorage::add_router(int id, int grid_position_x, int grid_posistion_y, i
 
 void NocStorage::add_link(NocRouterId source, NocRouterId sink) {
     VTR_ASSERT_MSG(!built_noc, "NoC already built, cannot modify further.");
-    link_storage.emplace_back(source, sink);
 
-    // the newly added link was added to the back of the list, so we can get the id as the last element in the list
-    NocLinkId added_link_id((int)link_storage.size() - 1);
+    // the new link will be added to the back of the list,
+    // so we can use the total number of links added so far as id
+    NocLinkId added_link_id((int)link_storage.size());
+
+    double link_bandwidth = get_noc_link_bandwidth();
+    link_storage.emplace_back(added_link_id, source, sink, link_bandwidth);
+
     router_link_list[source].push_back(added_link_id);
 
     return;
@@ -111,6 +128,12 @@ void NocStorage::add_link(NocRouterId source, NocRouterId sink) {
 
 void NocStorage::set_noc_link_bandwidth(double link_bandwidth) {
     noc_link_bandwidth = link_bandwidth;
+
+    // Iterate over all links and set their bandwidth
+    for (auto& link : link_storage) {
+        link.set_bandwidth(noc_link_bandwidth);
+    }
+
     return;
 }
 
diff --git a/vpr/src/noc/noc_storage.h b/vpr/src/noc/noc_storage.h
index f35f0121eb2..637d9f52126 100644
--- a/vpr/src/noc/noc_storage.h
+++ b/vpr/src/noc/noc_storage.h
@@ -24,13 +24,13 @@
  * 
  * Link
  * ----
- * A link is a component of the NoC ans is defined by the
+ * A link is a component of the NoC and is defined by the
  * NocLink class. Links are connections between two routers.
  * Links are used by routers to communicate with other routers
  * in the NoC. They can be thought of as edges in a graph. Links
  * have a source router where they exit from and sink router where
  * they enter. It is important to note that the links are not
- * unidirectional, the legal way to traverse a link is from the
+ * bi-directional; the legal way to traverse a link is from the
  * source router of the link to the sink router.
  * 
  */
@@ -269,6 +269,21 @@ class NocStorage {
      */
     const NocLink& get_single_noc_link(NocLinkId id) const;
 
+    /**
+     * @brief Given source and sink router identifiers, this function
+     * finds a link connecting these routers and returns its identifier.
+     * If such a link does not exist, an invalid id is returned.
+     * The function is not optimized for performance as it has a complexity
+     * of O(N_links).
+     *
+     * @param src_router The unique router identifier for the source router.
+     * @param dst_router The unique router identifier for the destination router.
+     * @return A link identifier (NocLinkId) that connects the source router
+     * to the destination router. NocLinkId::INVALID() is such a link is not
+     * found.
+     */
+    NocLinkId  get_single_noc_link_id(NocRouterId src_router, NocRouterId dst_router) const;
+
     /**
      * @brief Given a unique link identifier, get the corresponding link
      * within the NoC. The link can be modified, so the intended use
diff --git a/vpr/src/place/initial_noc_placement.cpp b/vpr/src/place/initial_noc_placement.cpp
index 55d3c6296d1..9294f3b291b 100644
--- a/vpr/src/place/initial_noc_placement.cpp
+++ b/vpr/src/place/initial_noc_placement.cpp
@@ -3,6 +3,7 @@
 #include "initial_placement.h"
 #include "noc_place_utils.h"
 #include "noc_place_checkpoint.h"
+#include "vtr_math.h"
 
 /**
  * @brief Evaluates whether a NoC router swap should be accepted or not.
@@ -32,7 +33,8 @@ static void place_constrained_noc_router(ClusterBlockId router_blk_id);
  *   NoC routers.
  *   @param seed Used for shuffling NoC routers.
  */
-static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_routers, int seed);
+static void place_noc_routers_randomly(std::vector<ClusterBlockId>& unfixed_routers,
+                                       int seed);
 
 /**
  * @brief Runs a simulated annealing optimizer for NoC routers.
@@ -158,10 +160,11 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     t_placer_costs costs;
 
     // Initialize NoC-related costs
-    costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-    costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+    costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+    std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+    costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
     update_noc_normalization_factors(costs);
-    costs.cost = calculate_noc_cost(costs, noc_opts);
+    costs.cost = calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts);
 
     // Maximum distance in each direction that a router can travel in a move
     // It is assumed that NoC routers are organized in a square grid.
@@ -178,11 +181,13 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     // the constant factor above 35000.
     // Get all the router clusters and figure out how many of them exist
     const int num_router_clusters = noc_ctx.noc_traffic_flows_storage.get_router_clusters_in_netlist().size();
-    const int N_MOVES = num_router_clusters * 35000;
+    const int N_MOVES_PER_ROUTER = 35000;
+    const int N_MOVES = num_router_clusters * N_MOVES_PER_ROUTER;
 
     const double starting_prob = 0.5;
     const double prob_step = starting_prob / N_MOVES;
 
+
     // The checkpoint stored the placement with the lowest cost.
     NoCPlacementCheckpoint checkpoint;
 
@@ -211,10 +216,9 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
         if (create_move_outcome != e_create_move::ABORT) {
             apply_move_blocks(blocks_affected);
 
-            double noc_aggregate_bandwidth_delta_c = 0.0;
-            double noc_latency_delta_c = 0.0;
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
-            double delta_cost = (noc_opts.noc_placement_weighting) * (noc_latency_delta_c * costs.noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs.noc_aggregate_bandwidth_cost_norm);
+            NocCostTerms noc_delta_c;
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
+            double delta_cost = calculate_noc_cost(noc_delta_c, costs.noc_cost_norm_factors, noc_opts);
 
             double prob = starting_prob - i_move * prob_step;
             bool move_accepted = accept_noc_swap(delta_cost, prob);
@@ -223,8 +227,8 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
                 costs.cost += delta_cost;
                 commit_move_blocks(blocks_affected);
                 commit_noc_costs();
-                costs.noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
-                costs.noc_latency_cost += noc_latency_delta_c;
+                costs += noc_delta_c;
+                // check if the current placement is better than the stored checkpoint
                 if (costs.cost < checkpoint.get_cost() || !checkpoint.is_valid()) {
                     checkpoint.save_checkpoint(costs.cost);
                 }
@@ -236,7 +240,7 @@ static void noc_routers_anneal(const t_noc_opts& noc_opts) {
     }
 
     if (checkpoint.get_cost() < costs.cost) {
-        checkpoint.restore_checkpoint(noc_opts, costs);
+        checkpoint.restore_checkpoint(costs);
     }
 }
 
diff --git a/vpr/src/place/noc_place_checkpoint.cpp b/vpr/src/place/noc_place_checkpoint.cpp
index a25cd9ec82c..e0f41dc94f0 100644
--- a/vpr/src/place/noc_place_checkpoint.cpp
+++ b/vpr/src/place/noc_place_checkpoint.cpp
@@ -32,7 +32,7 @@ void NoCPlacementCheckpoint::save_checkpoint(double cost) {
     cost_ = cost;
 }
 
-void NoCPlacementCheckpoint::restore_checkpoint(const t_noc_opts& noc_opts, t_placer_costs& costs) {
+void NoCPlacementCheckpoint::restore_checkpoint(t_placer_costs& costs) {
     const auto& noc_ctx = g_vpr_ctx.noc();
     const auto& device_ctx = g_vpr_ctx.device();
     auto& place_ctx = g_vpr_ctx.mutable_placement();
@@ -68,7 +68,7 @@ void NoCPlacementCheckpoint::restore_checkpoint(const t_noc_opts& noc_opts, t_pl
     }
 
     // Re-initialize routes and static variables that keep track of NoC-related costs
-    reinitialize_noc_routing(noc_opts, costs);
+    reinitialize_noc_routing(costs);
 }
 
 bool NoCPlacementCheckpoint::is_valid() const {
diff --git a/vpr/src/place/noc_place_checkpoint.h b/vpr/src/place/noc_place_checkpoint.h
index bf5c4305616..11df0a50732 100644
--- a/vpr/src/place/noc_place_checkpoint.h
+++ b/vpr/src/place/noc_place_checkpoint.h
@@ -43,10 +43,9 @@ class NoCPlacementCheckpoint {
     /**
      * @brief Loads the save checkpoint into global placement data structues.
      *
-     *  @param noc_opts: Contains weighting factors for different NoC cost terms
      *  @param costs: Used to load NoC related costs for the checkpoint
      */
-    void restore_checkpoint(const t_noc_opts& noc_opts, t_placer_costs& costs);
+    void restore_checkpoint(t_placer_costs& costs);
 
     /**
      * @brief Indicates whether the object is empty or it has already stored a
diff --git a/vpr/src/place/noc_place_utils.cpp b/vpr/src/place/noc_place_utils.cpp
index 24745755123..a228cd1836e 100644
--- a/vpr/src/place/noc_place_utils.cpp
+++ b/vpr/src/place/noc_place_utils.cpp
@@ -7,6 +7,12 @@ static vtr::vector<NocTrafficFlowId, TrafficFlowPlaceCost> traffic_flow_costs, p
 
 /* Keeps track of traffic flows that have been updated at each attempted placement move*/
 static std::vector<NocTrafficFlowId> affected_traffic_flows;
+
+/* Proposed and actual congestion cost of a NoC link used for each move assessment */
+static vtr::vector<NocLinkId , double> link_congestion_costs, proposed_link_congestion_costs;
+
+/* Keeps track of NoC links whose bandwidth usage have been updated at each attempted placement move*/
+static std::unordered_set<NocLinkId> affected_noc_links;
 /*********************************************************** *****************************/
 
 /**
@@ -19,28 +25,39 @@ static std::vector<NocTrafficFlowId> affected_traffic_flows;
  * False if there are no NoC routers in the netlist or the
  * selected NoC router is fixed/
  */
-static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type);
+static bool select_random_router_cluster(ClusterBlockId& b_from,
+                                         t_pl_loc& from,
+                                         t_logical_block_type_ptr& cluster_from_type);
 
-void initial_noc_routing(void) {
-    // need to get placement information about where the router cluster blocks are placed on the device
-    const auto& place_ctx = g_vpr_ctx.placement();
+/**
+ * @brief Given two traffic flow routes, finds links that appear
+ * only in one route.
+ *
+ * @param prev_links Previous route before re-routing the traffic flow
+ * @param curr_links Current route after re-routing the traffic flow
+ *
+ * @return Unique links that appear only in one of the given routes
+ */
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links,
+                                                                  std::vector<NocLinkId>& curr_links);
 
+void initial_noc_routing(void) {
     // need to update the link usages within after routing all the traffic flows
     // also need to route all the traffic flows and store them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     /* We need all the traffic flow ids to be able to access them. The range
      * of traffic flow ids go from 0 to the total number of traffic flows within
      * the NoC.
      * go through all the traffic flows and route them. Then once routed, update the links used in the routed traffic flows with their usages
      */
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // update the traffic flow route based on where the router cluster blocks are placed
-        std::vector<NocLinkId>& curr_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_ctx.noc_model, *noc_traffic_flows_storage, *noc_ctx.noc_flows_router, place_ctx.block_locs);
+        std::vector<NocLinkId>& curr_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_ctx.noc_model,noc_traffic_flows_storage, *noc_ctx.noc_flows_router);
 
         // update the links used in the found traffic flow route, links' bandwidth should be incremented since the traffic flow is routed
         update_traffic_flow_link_usage(curr_traffic_flow_route, noc_ctx.noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
@@ -49,7 +66,8 @@ void initial_noc_routing(void) {
     return;
 }
 
-void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs) {
+void reinitialize_noc_routing(t_placer_costs& costs) {
+    // used to access NoC links and modify them
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
     // Zero out bandwidth usage for all links
@@ -61,64 +79,111 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
     initial_noc_routing();
 
     // Initialize traffic_flow_costs
-    costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-    costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+    costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+    std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+    costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
 }
 
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts) {
-    // provides the positions where the affected blocks have moved to
-    auto& place_ctx = g_vpr_ctx.placement();
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
+                                                    NocCostTerms& delta_c) {
+    /* For speed, delta_c is passed by reference instead of being returned.
+     * We expect delta cost terms to be zero to ensure correctness.
+     */
+    VTR_ASSERT_SAFE(delta_c.aggregate_bandwidth == 0.);
+    VTR_ASSERT_SAFE(delta_c.latency == 0.);
+    VTR_ASSERT_SAFE(delta_c.latency_overrun == 0.);
+    VTR_ASSERT_SAFE(delta_c.congestion == 0.);
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     // keeps track of traffic flows that have been re-routed
     // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice.
     std::unordered_set<NocTrafficFlowId> updated_traffic_flows;
 
     affected_traffic_flows.clear();
+    affected_noc_links.clear();
 
     // go through the moved blocks and process them only if they are NoC routers
     for (int iblk = 0; iblk < blocks_affected.num_moved_blocks; ++iblk) {
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         // check if the current moved block is a noc router
-        if (noc_traffic_flows_storage->check_if_cluster_block_has_traffic_flows(blk)) {
+        if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) {
             // current block is a router, so re-route all the traffic flows it is a part of
-            re_route_associated_traffic_flows(blk, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, place_ctx.block_locs, updated_traffic_flows);
+            re_route_associated_traffic_flows(blk, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, updated_traffic_flows);
         }
     }
 
     // go through all the affected traffic flows and calculate their new costs after being re-routed, then determine the change in cost before the traffic flows were modified
     for (auto& traffic_flow_id : affected_traffic_flows) {
         // get the traffic flow route
-        const std::vector<NocLinkId>& traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const std::vector<NocLinkId>& traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         // get the current traffic flow info
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
+        // calculate the new aggregate bandwidth and latency costs for the affected traffic flow
         proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = calculate_traffic_flow_aggregate_bandwidth_cost(traffic_flow_route, curr_traffic_flow);
-        proposed_traffic_flow_costs[traffic_flow_id].latency = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
+        std::tie(proposed_traffic_flow_costs[traffic_flow_id].latency,
+                 proposed_traffic_flow_costs[traffic_flow_id].latency_overrun) = calculate_traffic_flow_latency_cost(traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
+
+        // compute how much the aggregate bandwidth and latency costs change with this swap
+        delta_c.aggregate_bandwidth += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
+        delta_c.latency += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
+        delta_c.latency_overrun += proposed_traffic_flow_costs[traffic_flow_id].latency_overrun - traffic_flow_costs[traffic_flow_id].latency_overrun;
+    }
+
+    // Iterate over all affected links and calculate their new congestion cost and store it
+    for (const auto& link_id : affected_noc_links) {
+        // get the affected link
+        const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
+
+        // calculate the new congestion cost for the link and store it
+        proposed_link_congestion_costs[link] = calculate_link_congestion_cost(link);
 
-        noc_aggregate_bandwidth_delta_c += proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth - traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
-        noc_latency_delta_c += proposed_traffic_flow_costs[traffic_flow_id].latency - traffic_flow_costs[traffic_flow_id].latency;
+        // compute how much the congestion cost changes with this swap
+        delta_c.congestion += proposed_link_congestion_costs[link] - link_congestion_costs[link];
     }
 }
 
 void commit_noc_costs() {
+    // used to access NoC links
+    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+
+    // Iterate over all the traffic flows affected by the proposed router swap
     for (auto& traffic_flow_id : affected_traffic_flows) {
         // update the traffic flow costs
         traffic_flow_costs[traffic_flow_id] = proposed_traffic_flow_costs[traffic_flow_id];
 
         // reset the proposed traffic flows costs
-        proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = -1;
-        proposed_traffic_flow_costs[traffic_flow_id].latency = -1;
+        proposed_traffic_flow_costs[traffic_flow_id].aggregate_bandwidth = INVALID_NOC_COST_TERM;
+        proposed_traffic_flow_costs[traffic_flow_id].latency = INVALID_NOC_COST_TERM;
+        proposed_traffic_flow_costs[traffic_flow_id].latency_overrun = INVALID_NOC_COST_TERM;
+    }
+
+    // Iterate over all the NoC links whose bandwidth utilization was affected by the proposed move
+    for(auto link_id : affected_noc_links) {
+        // get the affected link
+        const auto& link = noc_ctx.noc_model.get_single_noc_link(link_id);
+
+        // commit the new link congestion cost
+        link_congestion_costs[link] = proposed_link_congestion_costs[link];
+
+        // invalidate the proposed link congestion flow costs
+        proposed_link_congestion_costs[link] = INVALID_NOC_COST_TERM;
     }
 
     return;
 }
 
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations) {
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                                           const NocStorage& noc_model,
+                                           NocTrafficFlows& noc_traffic_flows_storage,
+                                           NocRouting& noc_flows_router) {
+    // provides the positions where the affected blocks have moved to
+    auto& place_ctx = g_vpr_ctx.placement();
+
     // get the traffic flow with the current id
     const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
@@ -127,8 +192,8 @@ std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id,
     ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id;
 
     // get the ids of the hard router blocks where the logical router cluster blocks have been placed
-    NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_source_router_block_id].loc);
-    NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_sink_router_block_id].loc);
+    NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_source_router_block_id].loc);
+    NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(place_ctx.block_locs[logical_sink_router_block_id].loc);
 
     // route the current traffic flow
     std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_mutable_traffic_flow_route(traffic_flow_id);
@@ -153,7 +218,11 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
     return;
 }
 
-void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
+void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id,
+                                       NocTrafficFlows& noc_traffic_flows_storage,
+                                       NocStorage& noc_model,
+                                       NocRouting& noc_flows_router,
+                                       std::unordered_set<NocTrafficFlowId>& updated_traffic_flows) {
     // get all the associated traffic flows for the logical router cluster block
     const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(moved_block_router_id);
 
@@ -163,12 +232,26 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
         for (auto& traffic_flow_id : *assoc_traffic_flows) {
             // first check to see whether we have already re-routed the current traffic flow and only re-route it if we haven't already.
             if (updated_traffic_flows.find(traffic_flow_id) == updated_traffic_flows.end()) {
+                // get all links for this flow route before it is rerouted
+                // The returned const std::vector<NocLinkId>& is copied so that we can modify (sort) it in find_affected_links_by_flow_reroute()
+                std::vector<NocLinkId> prev_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+
                 // now update the current traffic flow by re-routing it based on the new locations of its src and destination routers
-                re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router, placed_cluster_block_locations);
+                re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_model, noc_flows_router);
 
                 // now make sure we don't update this traffic flow a second time by adding it to the group of updated traffic flows
                 updated_traffic_flows.insert(traffic_flow_id);
 
+                // get all links for this flow route after it is rerouted
+                std::vector<NocLinkId> curr_traffic_flow_links = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+
+                // find links that appear in the old route or the new one, but not both of them
+                // these are the links whose bandwidth utilization is affected by rerouting
+                auto unique_links = find_affected_links_by_flow_reroute(prev_traffic_flow_links, curr_traffic_flow_links);
+
+                // update the static data structure to remember which links were affected by router swap
+                affected_noc_links.insert(unique_links.begin(), unique_links.end());
+
                 // update global datastructures to indicate that the current traffic flow was affected due to router cluster blocks being swapped
                 affected_traffic_flows.push_back(traffic_flow_id);
             }
@@ -179,11 +262,9 @@ void re_route_associated_traffic_flows(ClusterBlockId moved_block_router_id, Noc
 }
 
 void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affected) {
-    // provides the positions where the affected blocks have moved to
-    auto& place_ctx = g_vpr_ctx.placement();
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
 
-    NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     // keeps track of traffic flows that have been reverted
     // This is useful for cases where two moved routers were part of the same traffic flow and prevents us from re-routing the same flow twice.
@@ -194,20 +275,20 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
         ClusterBlockId blk = blocks_affected.moved_blocks[iblk].block_num;
 
         // check if the current moved block is a noc router
-        if (noc_traffic_flows_storage->check_if_cluster_block_has_traffic_flows(blk)) {
+        if (noc_traffic_flows_storage.check_if_cluster_block_has_traffic_flows(blk)) {
             // current block is a router, so re-route all the traffic flows it is a part of //
 
             // get all the associated traffic flows for the logical router cluster block
-            const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage->get_traffic_flows_associated_to_router_block(blk);
+            const std::vector<NocTrafficFlowId>* assoc_traffic_flows = noc_traffic_flows_storage.get_traffic_flows_associated_to_router_block(blk);
 
             // now check if there are any associated traffic flows
-            if (assoc_traffic_flows->size() != 0) {
+            if (assoc_traffic_flows != nullptr) {
                 // There are traffic flows associated to the current router block so process them
                 for (auto& traffic_flow_id : *assoc_traffic_flows) {
                     // first check to see whether we have already reverted the current traffic flow and only revert it if we haven't already.
                     if (reverted_traffic_flows.find(traffic_flow_id) == reverted_traffic_flows.end()) {
                         // Revert the traffic flow route by re-routing it
-                        re_route_traffic_flow(traffic_flow_id, *noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router, place_ctx.block_locs);
+                        re_route_traffic_flow(traffic_flow_id, noc_traffic_flows_storage, noc_ctx.noc_model, *noc_ctx.noc_flows_router);
 
                         // make sure we do not revert this traffic flow again
                         reverted_traffic_flows.insert(traffic_flow_id);
@@ -220,7 +301,10 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
     return;
 }
 
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations) {
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                           NocTrafficFlows& noc_traffic_flows_storage,
+                           NocStorage& noc_model,
+                           NocRouting& noc_flows_router) {
     // get the current traffic flow info
     const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
@@ -233,21 +317,28 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& no
     update_traffic_flow_link_usage(curr_traffic_flow_route, noc_model, -1, curr_traffic_flow.traffic_flow_bandwidth);
 
     // now get the re-routed traffic flow route and increment all the link usages with this reverted route
-    std::vector<NocLinkId>& re_routed_traffic_flow_route = get_traffic_flow_route(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router, placed_cluster_block_locations);
+    std::vector<NocLinkId>& re_routed_traffic_flow_route = route_traffic_flow(traffic_flow_id, noc_model, noc_traffic_flows_storage, noc_flows_router);
     update_traffic_flow_link_usage(re_routed_traffic_flow_route, noc_model, 1, curr_traffic_flow.traffic_flow_bandwidth);
 
     return;
 }
 
-void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_noc_latency_cost) {
+void recompute_noc_costs(NocCostTerms& new_cost) {
+    auto& noc_ctx = g_vpr_ctx.noc();
+
     // reset the cost variables first
-    new_noc_aggregate_bandwidth_cost = 0;
-    new_noc_latency_cost = 0;
+    new_cost = NocCostTerms{0.0, 0.0, 0.0, 0.0};
 
     // go through the costs of all the traffic flows and add them up to recompute the total costs associated with the NoC
-    for (const auto& traffic_flow_id : g_vpr_ctx.noc().noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        new_noc_aggregate_bandwidth_cost += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
-        new_noc_latency_cost += traffic_flow_costs[traffic_flow_id].latency;
+    for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        new_cost.aggregate_bandwidth += traffic_flow_costs[traffic_flow_id].aggregate_bandwidth;
+        new_cost.latency += traffic_flow_costs[traffic_flow_id].latency;
+        new_cost.latency_overrun += traffic_flow_costs[traffic_flow_id].latency_overrun;
+    }
+
+    // Iterate over all NoC links and accumulate their congestion costs
+    for (auto& link_id : noc_ctx.noc_model.get_noc_links()) {
+        new_cost.congestion += link_congestion_costs[link_id];
     }
 
     return;
@@ -255,31 +346,41 @@ void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_n
 
 void update_noc_normalization_factors(t_placer_costs& costs) {
     //Prevent the norm factors from going to infinity
-    costs.noc_aggregate_bandwidth_cost_norm = std::min(1 / costs.noc_aggregate_bandwidth_cost, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
-    costs.noc_latency_cost_norm = std::min(1 / costs.noc_latency_cost, MAX_INV_NOC_LATENCY_COST);
+    costs.noc_cost_norm_factors.aggregate_bandwidth = std::min(1 / costs.noc_cost_terms.aggregate_bandwidth, MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST);
+    costs.noc_cost_norm_factors.latency = std::min(1 / costs.noc_cost_terms.latency, MAX_INV_NOC_LATENCY_COST);
+
+    // to avoid division by zero and negative numbers
+    // latency overrun cost may take very small negative values due to round-off error
+    if (costs.noc_cost_terms.latency_overrun > 0.0) {
+        costs.noc_cost_norm_factors.latency_overrun = std::min(1 / costs.noc_cost_terms.latency_overrun, MAX_INV_NOC_LATENCY_COST);
+    } else {
+        costs.noc_cost_norm_factors.latency_overrun = MAX_INV_NOC_LATENCY_COST;
+    }
 
-    return;
-}
+    // to avoid division by zero and negative numbers
+    // congestion cost may take very small negative values due to round-off error
+    if (costs.noc_cost_terms.congestion > 0.0) {
+        costs.noc_cost_norm_factors.congestion = std::min(1 / costs.noc_cost_terms.congestion, MAX_INV_NOC_CONGESTION_COST);
+    } else {
+        costs.noc_cost_norm_factors.congestion = MAX_INV_NOC_CONGESTION_COST;
+    }
 
-double calculate_noc_cost(const t_placer_costs& costs, const t_noc_opts& noc_opts) {
-    double noc_cost;
-    noc_cost = (noc_opts.noc_placement_weighting) * ((costs.noc_aggregate_bandwidth_cost * costs.noc_aggregate_bandwidth_cost_norm) + (costs.noc_latency_cost * costs.noc_latency_cost_norm));
-    return noc_cost;
+    return;
 }
 
 double comp_noc_aggregate_bandwidth_cost(void) {
     // used to get traffic flow route information
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+    auto& noc_ctx = g_vpr_ctx.noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     double noc_aggregate_bandwidth_cost = 0.;
 
     // now go through each traffic flow route and calculate its
     // aggregate bandwidth. Then store this in local data structures and accumulate it.
     for (const auto& traffic_flow_id : g_vpr_ctx.noc().noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         double curr_traffic_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(curr_traffic_flow_route, curr_traffic_flow);
 
@@ -293,99 +394,159 @@ double comp_noc_aggregate_bandwidth_cost(void) {
     return noc_aggregate_bandwidth_cost;
 }
 
-double comp_noc_latency_cost(const t_noc_opts& noc_opts) {
+std::pair<double, double> comp_noc_latency_cost() {
     // used to get traffic flow route information
-    auto& noc_ctx = g_vpr_ctx.mutable_noc();
+    auto& noc_ctx = g_vpr_ctx.noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
-    double noc_latency_cost = 0.;
+    std::pair<double, double> noc_latency_cost_terms{0.0, 0.0};
 
     // now go through each traffic flow route and calculate its
     // latency. Then store this in local data structures and accumulate it.
     for (const auto& traffic_flow_id : noc_ctx.noc_traffic_flows_storage.get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
-        double curr_traffic_flow_latency_cost = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow, noc_opts);
+        auto [curr_traffic_flow_latency, curr_traffic_flow_latency_overrun] = calculate_traffic_flow_latency_cost(curr_traffic_flow_route, noc_ctx.noc_model, curr_traffic_flow);
 
-        // store the calculated latency for the current traffic flow in local datastructures (this also initializes them)
-        traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency_cost;
+        // store the calculated latency cost terms for the current traffic flow in local datastructures (this also initializes them)
+        traffic_flow_costs[traffic_flow_id].latency = curr_traffic_flow_latency;
+        traffic_flow_costs[traffic_flow_id].latency_overrun = curr_traffic_flow_latency_overrun;
 
-        // accumulate the aggregate bandwidth cost
-        noc_latency_cost += curr_traffic_flow_latency_cost;
+        // accumulate the latency cost terms
+        noc_latency_cost_terms.first += curr_traffic_flow_latency;
+        noc_latency_cost_terms.second += curr_traffic_flow_latency_overrun;
+    }
+
+    return noc_latency_cost_terms;
+}
+
+double comp_noc_congestion_cost() {
+    // Used to access NoC links
+    auto& noc_ctx = g_vpr_ctx.noc();
+
+    double congestion_cost = 0.;
+
+    // Iterate over all NoC links
+    for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+        double link_congestion_cost = calculate_link_congestion_cost(link);
+
+        // store the congestion cost for this link in static data structures (this also initializes them)
+        link_congestion_costs[link] = link_congestion_cost;
+
+        // accumulate the congestion cost
+        congestion_cost += link_congestion_cost;
     }
 
-    return noc_latency_cost;
+    return congestion_cost;
 }
 
 int check_noc_placement_costs(const t_placer_costs& costs, double error_tolerance, const t_noc_opts& noc_opts) {
     int error = 0;
-    double noc_aggregate_bandwidth_cost_check = 0.;
-    double noc_latency_cost_check = 0.;
+    NocCostTerms cost_check{0.0, 0.0, 0.0, 0.0};
 
     // get current router block locations
     auto& place_ctx = g_vpr_ctx.placement();
-    const vtr::vector_map<ClusterBlockId, t_block_loc>* placed_cluster_block_locations = &place_ctx.block_locs;
+    const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations = place_ctx.block_locs;
 
     auto& noc_ctx = g_vpr_ctx.noc();
-    const NocStorage* noc_model = &noc_ctx.noc_model;
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocStorage& noc_model = noc_ctx.noc_model;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
+
+    // a copy of NoC link storage used to calculate link bandwidth utilization from scratch
+    vtr::vector<NocLinkId, NocLink> temp_noc_link_storage = noc_model.get_noc_links();
+
+    // reset bandwidth utilization for all links
+    std::for_each(temp_noc_link_storage.begin(), temp_noc_link_storage.end(), [](NocLink& link) {link.set_bandwidth_usage(0.0); });
 
     // need to create a temporary noc routing algorithm
-    NocRoutingAlgorithmCreator routing_algorithm_factory;
-    NocRouting* temp_noc_routing_algorithm = routing_algorithm_factory.create_routing_algorithm(noc_opts.noc_routing_algorithm);
+    std::unique_ptr<NocRouting> temp_noc_routing_algorithm = NocRoutingAlgorithmCreator::create_routing_algorithm(noc_opts.noc_routing_algorithm);
 
     // stores a temporarily found route for a traffic flow
     std::vector<NocLinkId> temp_found_noc_route;
 
     // go through all the traffic flows and find a route for them based on where the routers are placed within the NoC
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
         // get the traffic flow with the current id
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // get the source and destination logical router blocks in the current traffic flow
         ClusterBlockId logical_source_router_block_id = curr_traffic_flow.source_router_cluster_id;
         ClusterBlockId logical_sink_router_block_id = curr_traffic_flow.sink_router_cluster_id;
 
         // get the ids of the hard router blocks where the logical router cluster blocks have been placed
-        NocRouterId source_router_block_id = noc_model->get_router_at_grid_location((*placed_cluster_block_locations)[logical_source_router_block_id].loc);
-        NocRouterId sink_router_block_id = noc_model->get_router_at_grid_location((*placed_cluster_block_locations)[logical_sink_router_block_id].loc);
+        NocRouterId source_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_source_router_block_id].loc);
+        NocRouterId sink_router_block_id = noc_model.get_router_at_grid_location(placed_cluster_block_locations[logical_sink_router_block_id].loc);
 
         // route the current traffic flow
-        temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, temp_found_noc_route, *noc_model);
+        temp_noc_routing_algorithm->route_flow(source_router_block_id, sink_router_block_id, temp_found_noc_route, noc_model);
 
         // now calculate the costs associated to the current traffic flow and accumulate it to find the total cost of the NoC placement
         double current_flow_aggregate_bandwidth_cost = calculate_traffic_flow_aggregate_bandwidth_cost(temp_found_noc_route, curr_traffic_flow);
-        noc_aggregate_bandwidth_cost_check += current_flow_aggregate_bandwidth_cost;
-
-        double current_flow_latency_cost = calculate_traffic_flow_latency_cost(temp_found_noc_route, *noc_model, curr_traffic_flow, noc_opts);
-        noc_latency_cost_check += current_flow_latency_cost;
+        cost_check.aggregate_bandwidth += current_flow_aggregate_bandwidth_cost;
+
+        auto [curr_traffic_flow_latency_cost, curr_traffic_flow_latency_overrun_cost] = calculate_traffic_flow_latency_cost(temp_found_noc_route, noc_model, curr_traffic_flow);
+        cost_check.latency += curr_traffic_flow_latency_cost;
+        cost_check.latency_overrun += curr_traffic_flow_latency_overrun_cost;
+
+        // increase bandwidth utilization for the links that constitute the current flow's route
+        for (auto& link_id : temp_found_noc_route) {
+            auto& link = temp_noc_link_storage[link_id];
+            double curr_link_bw_util = link.get_bandwidth_usage();
+            link.set_bandwidth_usage(curr_link_bw_util + curr_traffic_flow.traffic_flow_bandwidth);
+            VTR_ASSERT(link.get_bandwidth_usage() >= 0.0);
+        }
 
         // clear the current traffic flow route, so we can route the next traffic flow
         temp_found_noc_route.clear();
     }
 
+    // Iterate over all NoC links and accumulate congestion cost
+    for(const auto& link : temp_noc_link_storage) {
+        cost_check.congestion += calculate_link_congestion_cost(link);
+    }
+
     // check whether the aggregate bandwidth placement cost is within the error tolerance
-    if (fabs(noc_aggregate_bandwidth_cost_check - costs.noc_aggregate_bandwidth_cost) > costs.noc_aggregate_bandwidth_cost * error_tolerance) {
+    if (fabs(cost_check.aggregate_bandwidth - costs.noc_cost_terms.aggregate_bandwidth) > costs.noc_cost_terms.aggregate_bandwidth * error_tolerance) {
         VTR_LOG_ERROR(
             "noc_aggregate_bandwidth_cost_check: %g and noc_aggregate_bandwidth_cost: %g differ in check_noc_placement_costs.\n",
-            noc_aggregate_bandwidth_cost_check, costs.noc_aggregate_bandwidth_cost);
+            cost_check.aggregate_bandwidth, costs.noc_cost_terms.aggregate_bandwidth);
         error++;
     }
 
     // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
-    if (noc_latency_cost_check > MIN_EXPECTED_NOC_LATENCY_COST) {
+    if (cost_check.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
         // check whether the latency placement cost is within the error tolerance
-        if (fabs(noc_latency_cost_check - costs.noc_latency_cost) > costs.noc_latency_cost * error_tolerance) {
+        if (fabs(cost_check.latency - costs.noc_cost_terms.latency) > costs.noc_cost_terms.latency * error_tolerance) {
             VTR_LOG_ERROR(
                 "noc_latency_cost_check: %g and noc_latency_cost: %g differ in check_noc_placement_costs.\n",
-                noc_latency_cost_check, costs.noc_latency_cost);
+                cost_check.latency, costs.noc_cost_terms.latency);
+            error++;
+        }
+    }
+
+    // only check the recomputed cost if it is above our expected latency cost threshold of 1 pico-second, otherwise there is no point in checking it
+    if (cost_check.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+        // check whether the latency overrun placement cost is within the error tolerance
+        if (fabs(cost_check.latency_overrun - costs.noc_cost_terms.latency_overrun) > costs.noc_cost_terms.latency_overrun * error_tolerance) {
+            VTR_LOG_ERROR(
+                "noc_latency_overrun_cost_check: %g and noc_latency_overrun_cost: %g differ in check_noc_placement_costs.\n",
+                cost_check.latency_overrun, costs.noc_cost_terms.latency_overrun);
+            error++;
+        }
+    }
+
+    // check the recomputed congestion cost only if it is higher than the minimum expected value
+    if (cost_check.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+        // check whether the NoC congestion cost is within the error range
+        if (fabs(cost_check.congestion - costs.noc_cost_terms.congestion) > costs.noc_cost_terms.congestion * error_tolerance) {
+            VTR_LOG_ERROR(
+                "noc_congestion_cost_check: %g and noc_congestion_cost: %g differ in check_noc_placement_costs.\n",
+                cost_check.congestion, costs.noc_cost_terms.congestion);
             error++;
         }
     }
-    // delete the temporary routing algorithm
-    delete temp_noc_routing_algorithm;
 
     return error;
 }
@@ -397,7 +558,9 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
     return (traffic_flow_info.traffic_flow_priority * traffic_flow_info.traffic_flow_bandwidth * num_of_links_in_traffic_flow);
 }
 
-double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts) {
+std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
+                                                              const NocStorage& noc_model,
+                                                              const t_noc_traffic_flow& traffic_flow_info) {
     // there will always be one more router than links in a traffic flow
     int num_of_links_in_traffic_flow = traffic_flow_route.size();
     int num_of_routers_in_traffic_flow = num_of_links_in_traffic_flow + 1;
@@ -407,28 +570,74 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
     double noc_link_latency = noc_model.get_noc_link_latency();
     double noc_router_latency = noc_model.get_noc_router_latency();
 
-    // calculate the traffic flow_latency
+    // calculate the traffic flow latency
     double latency = (noc_link_latency * num_of_links_in_traffic_flow) + (noc_router_latency * num_of_routers_in_traffic_flow);
 
-    // calculate the cost
-    double single_traffic_flow_latency_cost = (noc_opts.noc_latency_constraints_weighting * std::max(0., latency - max_latency)) + (noc_opts.noc_latency_weighting * latency);
+    // calculate the traffic flow latency overrun
+    double latency_overrun = std::max(latency - max_latency, 0.);
 
     // scale the latency cost by its priority to indicate its importance
-    return (single_traffic_flow_latency_cost * traffic_flow_info.traffic_flow_priority);
+    latency *= traffic_flow_info.traffic_flow_priority;
+    latency_overrun *= traffic_flow_info.traffic_flow_priority;
+
+    return {latency, latency_overrun};
+}
+
+double calculate_link_congestion_cost(const NocLink& link) {
+    double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+
+    return congested_bw_ratio;
+}
+
+void normalize_noc_cost_weighting_factor(t_noc_opts& noc_opts) {
+
+    double weighting_factor_sum = noc_opts.noc_latency_weighting +
+                                  noc_opts.noc_latency_constraints_weighting +
+                                  noc_opts.noc_congestion_weighting;
+
+    VTR_ASSERT(weighting_factor_sum <= 1.0 && weighting_factor_sum >= 0.0);
+
+    noc_opts.noc_aggregate_bandwidth_weighting = 1.0 - weighting_factor_sum;
+}
+
+double calculate_noc_cost(const NocCostTerms& cost_terms,
+                          const NocCostTerms& norm_factors,
+                          const t_noc_opts& noc_opts) {
+    double cost = 0.0;
+
+    /* NoC's contribution to the placement cost is a weighted sum over:
+     * 1) Traffic flow aggregate bandwidth cost
+     * 2) Traffic flow latency cost
+     * 3) Traffic flow latency overrun cost
+     * 4) Link congestion cost
+     *
+     * Since NoC-related cost terms have different scales, they are
+     * rescaled by multiplying each cost term with its corresponding
+     * normalization factor. Then, a weighted sum over normalized cost terms
+     * is computed. Weighting factors determine the contribution of each
+     * normalized term to the sum.
+     */
+    cost = noc_opts.noc_placement_weighting * (
+               cost_terms.aggregate_bandwidth * norm_factors.aggregate_bandwidth * noc_opts.noc_aggregate_bandwidth_weighting +
+               cost_terms.latency * norm_factors.latency * noc_opts.noc_latency_weighting +
+               cost_terms.latency_overrun * norm_factors.latency_overrun * noc_opts.noc_latency_constraints_weighting +
+               cost_terms.congestion * norm_factors.congestion * noc_opts.noc_congestion_weighting);
+
+    return cost;
 }
 
 int get_number_of_traffic_flows_with_latency_cons_met(void) {
     // used to get traffic flow route information
     auto& noc_ctx = g_vpr_ctx.mutable_noc();
     // datastructure that stores all the traffic flow routes
-    const NocTrafficFlows* noc_traffic_flows_storage = &noc_ctx.noc_traffic_flows_storage;
+    const NocTrafficFlows& noc_traffic_flows_storage = noc_ctx.noc_traffic_flows_storage;
 
     int count_of_achieved_latency_cons = 0;
 
     // now go through each traffic flow route and check if its latency constraint was met
-    for (const auto& traffic_flow_id : noc_traffic_flows_storage->get_all_traffic_flow_id()) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage->get_single_noc_traffic_flow(traffic_flow_id);
-        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage->get_traffic_flow_route(traffic_flow_id);
+    for (const auto& traffic_flow_id : noc_traffic_flows_storage.get_all_traffic_flow_id()) {
+        const t_noc_traffic_flow& curr_traffic_flow = noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
+        const std::vector<NocLinkId>& curr_traffic_flow_route = noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
 
         // there will always be one more router than links in a traffic flow
         int num_of_links_in_traffic_flow = curr_traffic_flow_route.size();
@@ -451,13 +660,67 @@ int get_number_of_traffic_flows_with_latency_cons_met(void) {
     return count_of_achieved_latency_cons;
 }
 
+int get_number_of_congested_noc_links(void) {
+    // get NoC links
+    auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    int num_congested_links = 0;
+
+    // Iterate over all NoC links and count the congested ones
+    for (const auto& link : noc_links) {
+      double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+
+      if (congested_bw_ratio > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            num_congested_links++;
+      }
+    }
+
+    return num_congested_links;
+}
+
+double get_total_congestion_bandwidth_ratio(void) {
+    // get NoC links
+    auto& noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    double accum_congestion_ratio = 0.0;
+
+    // Iterate over all NoC links and count the congested ones
+    for (const auto& link : noc_links) {
+      double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+      accum_congestion_ratio += congested_bw_ratio;
+    }
+
+    return accum_congestion_ratio;
+}
+
+std::vector<NocLink> get_top_n_congested_links(int n) {
+    // get NoC links
+    vtr::vector<NocLinkId, NocLink> noc_links = g_vpr_ctx.noc().noc_model.get_noc_links();
+
+    // Sort links based on their congested bandwidth ration in descending order
+    // stable_sort is used to make sure the order is the same across different machines/compilers
+    // Note that when the vector is sorted, indexing it with NocLinkId does return the corresponding link
+    std::stable_sort(noc_links.begin(), noc_links.end(), [](const NocLink& l1, const NocLink& l2) {
+                         return l1.get_congested_bandwidth_ratio() > l2.get_congested_bandwidth_ratio();
+                     });
+
+    int pick_n = std::min((int)noc_links.size(), n);
+
+    return std::vector<NocLink>{noc_links.begin(), noc_links.begin() + pick_n};
+}
+
 void allocate_and_load_noc_placement_structs(void) {
     auto& noc_ctx = g_vpr_ctx.noc();
 
     int number_of_traffic_flows = noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows();
 
-    traffic_flow_costs.resize(number_of_traffic_flows);
-    proposed_traffic_flow_costs.resize(number_of_traffic_flows);
+    traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM});
+    proposed_traffic_flow_costs.resize(number_of_traffic_flows, {INVALID_NOC_COST_TERM, INVALID_NOC_COST_TERM});
+
+    int number_of_noc_links = noc_ctx.noc_model.get_number_of_noc_links();
+
+    link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM);
+    proposed_link_congestion_costs.resize(number_of_noc_links, INVALID_NOC_COST_TERM);
 
     return;
 }
@@ -467,6 +730,10 @@ void free_noc_placement_structs(void) {
     vtr::release_memory(proposed_traffic_flow_costs);
     vtr::release_memory(affected_traffic_flows);
 
+    vtr::release_memory(link_congestion_costs);
+    vtr::release_memory(proposed_link_congestion_costs);
+    vtr::release_memory(affected_noc_links);
+
     return;
 }
 
@@ -478,7 +745,7 @@ bool check_for_router_swap(int user_supplied_noc_router_swap_percentage) {
      * we now only swap router blocks for the percentage of time the user
      * supplied.
      * */
-    return (vtr::irand(99) < user_supplied_noc_router_swap_percentage) ? true : false;
+    return (vtr::irand(99) < user_supplied_noc_router_swap_percentage);
 }
 
 static bool select_random_router_cluster(ClusterBlockId& b_from, t_pl_loc& from, t_logical_block_type_ptr& cluster_from_type) {
@@ -608,4 +875,26 @@ void write_noc_placement_file(const std::string& file_name) {
     noc_placement_file.close();
 
     return;
+}
+
+static std::vector<NocLinkId> find_affected_links_by_flow_reroute(std::vector<NocLinkId>& prev_links,
+                                                                  std::vector<NocLinkId>& curr_links) {
+    // Sort both link containers
+    std::sort(prev_links.begin(), prev_links.end());
+    std::sort(curr_links.begin(), curr_links.end());
+
+    // stores links that appear either in prev_links or curr_links but not both of them
+    std::vector<NocLinkId> unique_links;
+
+    // find links that are unique to prev_links
+    std::set_difference(prev_links.begin(), prev_links.end(),
+                        curr_links.begin(), curr_links.end(),
+                        std::back_inserter(unique_links));
+
+    // find links that are unique to curr_links
+    std::set_difference(curr_links.begin(), curr_links.end(),
+                        prev_links.begin(), prev_links.end(),
+                        std::back_inserter(unique_links));
+
+    return unique_links;
 }
\ No newline at end of file
diff --git a/vpr/src/place/noc_place_utils.h b/vpr/src/place/noc_place_utils.h
index 5dbaed43f8f..24926c48925 100644
--- a/vpr/src/place/noc_place_utils.h
+++ b/vpr/src/place/noc_place_utils.h
@@ -19,21 +19,31 @@ constexpr double MAX_INV_NOC_AGGREGATE_BANDWIDTH_COST = 1.;
 // we expect the latency costs to be in the pico-second range, and we don't expect it to go lower than that. So if the latency costs go below the pico-second range we trim the normalization value to be no higher than 1/ps
 // This should be updated if the delays become lower
 constexpr double MAX_INV_NOC_LATENCY_COST = 1.e12;
-
 // we don't expect the noc_latency cost to ever go below 1 pico second.
 // So this value represents the lowest possible latency cost.
 constexpr double MIN_EXPECTED_NOC_LATENCY_COST = 1.e-12;
+// the congestion cost for a link is measured as the proportion of the overloaded BW to the link capacity
+// We assume that when a link congested, it is overloaded with at least 0.1% of its BW capacity
+constexpr double MAX_INV_NOC_CONGESTION_COST = 1.e3;
+// If a link is overloaded by less than 0.1% of the link bandwidth capacity,
+// we assume it is not congested.
+constexpr double MIN_EXPECTED_NOC_CONGESTION_COST = 1.e-3;
+
+constexpr double INVALID_NOC_COST_TERM = -1.0;
 
 /**
  * @brief Each traffic flow cost consists of two components:
  *        1) traffic flow aggregate bandwidth (sum over all used links of the traffic flow bandwidth)
  *        2) traffic flow latency (currently unloaded/best-case latency of the flow)
+ *        3) traffic flow latency overrun (how much the latency is higher than the
+ *        latency constraint for a traffic flow.
  *        NoC placement code will keep an array-of-struct to easily access each
  *        traffic flow cost.
  */
 struct TrafficFlowPlaceCost {
-    double aggregate_bandwidth = -1;
-    double latency = -1;
+    double aggregate_bandwidth = INVALID_NOC_COST_TERM;
+    double latency = INVALID_NOC_COST_TERM;
+    double latency_overrun = INVALID_NOC_COST_TERM;
 };
 
 /**
@@ -62,10 +72,9 @@ void initial_noc_routing(void);
  * traffic flow routes, and static variable in noc_place_utils.cpp are no
  * longer valid and need to be re-initialized.
  *
- * @param noc_opts NoC-related options used to calculated NoC costs
  * @param costs Used to get aggregate bandwidth and latency costs.
  */
-void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs);
+void reinitialize_noc_routing(t_placer_costs& costs);
 
 /**
  * @brief Goes through all the cluster blocks that were moved
@@ -100,7 +109,8 @@ void reinitialize_noc_routing(const t_noc_opts& noc_opts, t_placer_costs& costs)
  * NoC latency cost caused by a placer move is stored
  * here.
  */
-void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected, double& noc_aggregate_bandwidth_delta_c, double& noc_latency_delta_c, const t_noc_opts& noc_opts);
+void find_affected_noc_routers_and_update_noc_costs(const t_pl_blocks_to_be_moved& blocks_affected,
+                                                    NocCostTerms& delta_c);
 
 /**
  * @brief Updates static datastructures found in 'noc_place_utils.cpp'
@@ -131,6 +141,12 @@ void commit_noc_costs();
  * First, the hard routers blocks that represent the placed location of
  * the router cluster blocks are identified. Then the traffic flow
  * is routed and updated.
+ *
+ * Note that this function does not update the link bandwidth utilization.
+ * update_traffic_flow_link_usage() should be called after this function
+ * to update the link utilization for the new route. If the flow is re-routed
+ * because either its source or destination are moved, update_traffic_flow_link_usage()
+ * should be used to reduce the bandwidth utilization for the old route.
  * 
  * @param traffic_flow_id Represents the traffic flow that needs to be routed
  * @param noc_model Contains all the links and routers within the NoC. Used
@@ -139,11 +155,12 @@ void commit_noc_costs();
  * within the NoC. Used to get the current traffic flow information.
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  * @return std::vector<NocLinkId>& The found route for the traffic flow.
  */
-std::vector<NocLinkId>& get_traffic_flow_route(NocTrafficFlowId traffic_flow_id, const NocStorage& noc_model, NocTrafficFlows& noc_traffic_flows_storage, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations);
+std::vector<NocLinkId>& route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                                           const NocStorage& noc_model,
+                                           NocTrafficFlows& noc_traffic_flows_storage,
+                                           NocRouting& noc_flows_router);
 
 /**
  * @brief Updates the bandwidth usages of links found in a routed traffic flow.
@@ -188,12 +205,13 @@ void update_traffic_flow_link_usage(const std::vector<NocLinkId>& traffic_flow_r
  * to route traffic flows within the NoC.  
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  * @param updated_traffic_flows Keeps track of traffic flows that have been
  * re-routed. Used to prevent re-routing the same traffic flow multiple times.
  */
-void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations, std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
+void re_route_associated_traffic_flows(ClusterBlockId moved_router_block_id,
+                                       NocTrafficFlows& noc_traffic_flows_storage,
+                                       NocStorage& noc_model, NocRouting& noc_flows_router,
+                                       std::unordered_set<NocTrafficFlowId>& updated_traffic_flows);
 
 /**
  * @brief Used to re-route all the traffic flows associated to logical
@@ -223,10 +241,11 @@ void revert_noc_traffic_flow_routes(const t_pl_blocks_to_be_moved& blocks_affect
  * to route traffic flows within the NoC.
  * @param noc_flows_router The packet routing algorithm used to route traffic
  * flows within the NoC.
- * @param placed_cluster_block_locations A datastructure that identifies the
- * placed grid locations of all cluster blocks.
  */
-void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& noc_traffic_flows_storage, NocStorage& noc_model, NocRouting& noc_flows_router, const vtr::vector_map<ClusterBlockId, t_block_loc>& placed_cluster_block_locations);
+void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id,
+                           NocTrafficFlows& noc_traffic_flows_storage,
+                           NocStorage& noc_model,
+                           NocRouting& noc_flows_router);
 
 /**
  * @brief Recompute the NoC costs (aggregate bandwidth and latency) by
@@ -256,7 +275,7 @@ void re_route_traffic_flow(NocTrafficFlowId traffic_flow_id, NocTrafficFlows& no
  * @param new_noc_latency_cost Will store the newly computed
  * NoC latency cost for the current placement state.
  */
-void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_noc_latency_cost);
+void recompute_noc_costs(NocCostTerms& new_cost);
 
 /**
  * @brief Updates all the cost normalization factors relevant to the NoC.
@@ -270,17 +289,6 @@ void recompute_noc_costs(double& new_noc_aggregate_bandwidth_cost, double& new_n
  */
 void update_noc_normalization_factors(t_placer_costs& costs);
 
-/**
- * @brief Calculates total NoC cost.
- *
- *  @param costs Contains latency and aggregate bandwidth costs
- *  along with their corresponding normalization factors.
- *  @param noc_opts Contains NoC placement weighting factor.
- *
- * @return Calculated total NoC cost.
- */
-double calculate_noc_cost(const t_placer_costs& costs, const t_noc_opts& noc_opts);
-
 /**
  * @brief Calculates the aggregate bandwidth of each traffic flow in the NoC
  * and initializes local variables that keep track of the traffic flow 
@@ -306,7 +314,9 @@ double comp_noc_aggregate_bandwidth_cost(void);
  * 
  * @return double The latency cost of the NoC.
  */
-double comp_noc_latency_cost(const t_noc_opts& noc_opts);
+std::pair<double, double> comp_noc_latency_cost();
+
+double comp_noc_congestion_cost();
 
 /**
  * @brief Given a placement state the NoC costs are re-computed
@@ -369,11 +379,54 @@ double calculate_traffic_flow_aggregate_bandwidth_cost(const std::vector<NocLink
  * @param noc_model Contains noc information such as the router and link
  * latencies.
  * @param traffic_flow_info Contains the traffic flow priority.
- * @param noc_opts Contains the user provided weightings of the traffic flow 
- * latency and its constraint parameters for the cost calculation.
- * @return THe computed latency for the provided traffic flow
+ * @return The computed latency cost terms for the given traffic flow.
+ * The first element is the total latency experience by the traffic flow.
+ * The second one specifies how much the experienced latency exceeds the
+ * latency constraint set for this traffic flow.
+ */
+std::pair<double, double> calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route,
+                                                              const NocStorage& noc_model,
+                                                              const t_noc_traffic_flow& traffic_flow_info);
+
+/**
+ * @brief Determines the congestion cost a NoC link. The cost
+ * is calculating by measuring how much the current bandwidth
+ * going through the link exceeds the link's bandwidth capacity.
+ *
+ * @param link The NoC link for which the congestion cost is
+ * to be computed
+ * @return The computed congestion cost for the given NoC link.
+ */
+double calculate_link_congestion_cost(const NocLink& link);
+
+/**
+ * @brief The user passes weighting factors for aggregate latency
+ * and latency overrun terms. The weighting factor for aggregate
+ * bandwidth is computed by subtracting two user-provided weighting
+ * factor from 1. The computed aggregate bandwidth weighting factor
+ * is stored in noc_opts argument.
+ *
+ * @param noc_opts Contains weighting factors.
+ */
+void normalize_noc_cost_weighting_factor(t_noc_opts& noc_opts);
+
+/**
+ * @brief Computes a weighted average of NoC cost term to determine
+ * NoC's contribution to the total placement cost.
+ *
+ * @param cost_terms Different NoC-related cost terms.
+ * @param norm_factors Normalization factors used to scale
+ * different NoC-related cost term so that they have similar
+ * ranges.
+ * @param noc_opts Contains noc_placement_weighting factor
+ * to specify the contribution of NoC-related cost to the
+ * total placement cost.
+ * @return  The computed total NoC-related contribution to the
+ * total placement cost.
  */
-double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic_flow_route, const NocStorage& noc_model, const t_noc_traffic_flow& traffic_flow_info, const t_noc_opts& noc_opts);
+double calculate_noc_cost(const NocCostTerms& cost_terms,
+                          const NocCostTerms& norm_factors,
+                          const t_noc_opts& noc_opts);
 
 /**
  * @brief Goes through all the traffic flows and determines whether the
@@ -383,6 +436,41 @@ double calculate_traffic_flow_latency_cost(const std::vector<NocLinkId>& traffic
  */
 int get_number_of_traffic_flows_with_latency_cons_met(void);
 
+/**
+ * @brief Goes through all NoC links and counts the congested ones.
+ * A congested NoC link is a link whose used bandwidth exceeds its
+ * bandwidth capacity.
+ *
+ * @return The total number of congested NoC links.
+ */
+int get_number_of_congested_noc_links(void);
+
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then adds up the congestion ratio of all
+ * congested links.
+ *
+ * @return The total congestion ratio
+ */
+double get_total_congestion_bandwidth_ratio(void);
+
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then finds n links that are most congested.
+ *
+ * @return n links with highest congestion ratio
+ */
+std::vector<NocLink> get_top_n_congested_links(int n);
+
+
+/**
+ * @brief Goes through all NoC links and determines whether they
+ * are congested or not. Then finds n links that are most congested.
+ *
+ * @return n highest congestion ratios
+ */
+std::vector<double> get_top_n_congestion_ratios(int n);
+
 /**
  * @brief There are a number of static datastructures which are local
  * to 'noc_place_utils.cpp'. THe purpose of these datastructures is
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 25894fa952b..2e30d2f3c43 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -15,6 +15,7 @@
 #include "vtr_random.h"
 #include "vtr_geometry.h"
 #include "vtr_time.h"
+#include "vtr_math.h"
 
 #include "vpr_types.h"
 #include "vpr_error.h"
@@ -526,7 +527,7 @@ static void calculate_reward_and_process_outcome(
     float timing_bb_factor,
     MoveGenerator& move_generator);
 
-static void print_place_status_header();
+static void print_place_status_header(bool noc_enabled);
 
 static void print_place_status(const t_annealing_state& state,
                                const t_placer_statistics& stats,
@@ -534,7 +535,9 @@ static void print_place_status(const t_annealing_state& state,
                                float cpd,
                                float sTNS,
                                float sWNS,
-                               size_t tot_moves);
+                               size_t tot_moves,
+                               bool noc_enabled,
+                               const NocCostTerms& noc_cost_terms);
 
 static void print_resources_utilization();
 
@@ -577,7 +580,7 @@ void try_place(const Netlist<>& net_list,
 
     auto& timing_ctx = g_vpr_ctx.timing();
     auto pre_place_timing_stats = timing_ctx.stats;
-    int tot_iter, moves_since_cost_recompute, width_fac, num_connections,
+    int tot_iter, moves_since_cost_recompute, num_connections,
         outer_crit_iter_count, inner_recompute_limit;
     float first_crit_exponent, first_rlim, first_t;
     int first_move_lim;
@@ -592,7 +595,6 @@ void try_place(const Netlist<>& net_list,
     t_placer_statistics stats;
 
     t_placement_checkpoint placement_checkpoint;
-    t_graph_type graph_directionality;
 
     std::shared_ptr<SetupTimingInfo> timing_info;
     std::shared_ptr<PlacementDelayCalculator> placement_delay_calc;
@@ -649,20 +651,14 @@ void try_place(const Netlist<>& net_list,
     //create the move generator based on the chosen strategy
     create_move_generators(move_generator, move_generator2, placer_opts, move_lim);
 
-    width_fac = placer_opts.place_chan_width;
-
-    if (router_opts.route_type == GLOBAL) {
-        graph_directionality = GRAPH_BIDIR;
-    } else {
-        graph_directionality = (det_routing_arch->directionality == BI_DIRECTIONAL ? GRAPH_BIDIR : GRAPH_UNIDIR);
-    }
-
-    init_chan(width_fac, chan_width_dist, graph_directionality);
-
     alloc_and_load_placement_structs(placer_opts.place_cost_exp, placer_opts, noc_opts, directs, num_directs);
 
     vtr::ScopedStartFinishTimer timer("Placement");
 
+    if (noc_opts.noc) {
+        normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
+    }
+
     initial_placement(placer_opts,
                       placer_opts.constraints_file.c_str(),
                       noc_opts);
@@ -691,6 +687,7 @@ void try_place(const Netlist<>& net_list,
         place_sync_external_block_connections(block_id);
     }
 
+    const int width_fac = placer_opts.place_chan_width;
     init_draw_coords((float)width_fac);
 
     /* Allocated here because it goes into timing critical code where each memory allocation is expensive */
@@ -807,8 +804,9 @@ void try_place(const Netlist<>& net_list,
 
     if (noc_opts.noc) {
         // get the costs associated with the NoC
-        costs.noc_aggregate_bandwidth_cost = comp_noc_aggregate_bandwidth_cost();
-        costs.noc_latency_cost = comp_noc_latency_cost(noc_opts);
+        costs.noc_cost_terms.aggregate_bandwidth = comp_noc_aggregate_bandwidth_cost();
+        std::tie(costs.noc_cost_terms.latency, costs.noc_cost_terms.latency_overrun) = comp_noc_latency_cost();
+        costs.noc_cost_terms.congestion = comp_noc_congestion_cost();
 
         // initialize all the noc normalization factors
         update_noc_normalization_factors(costs);
@@ -828,7 +826,23 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Initial placement cost: %g bb_cost: %g td_cost: %g\n", costs.cost,
             costs.bb_cost, costs.timing_cost);
     if (noc_opts.noc) {
-        VTR_LOG("Initial noc placement costs. noc_aggregate_bandwidth_cost: %g, noc_latency_cost: %g, \n", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost);
+        VTR_LOG("NoC Placement Costs. "
+            "cost: %g, "
+            "aggregate_bandwidth_cost: %g, "
+            "latency_cost: %g, "
+            "n_met_latency_constraints: %d, "
+            "latency_overrun_cost: %g, "
+            "congestion_cost: %g, "
+            "accum_congested_ratio: %g, "
+            "n_congested_links: %d \n",
+            calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+            costs.noc_cost_terms.aggregate_bandwidth,
+            costs.noc_cost_terms.latency,
+            get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_cost_terms.latency_overrun,
+            costs.noc_cost_terms.congestion,
+            get_total_congestion_bandwidth_ratio(),
+            get_number_of_congested_noc_links());
     }
     if (placer_opts.place_algorithm.is_timing_driven()) {
         VTR_LOG(
@@ -862,7 +876,26 @@ void try_place(const Netlist<>& net_list,
             costs.cost, costs.bb_cost, costs.timing_cost, width_fac);
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nInitial noc placement costs. noc_aggregate_bandwidth_cost: %g noc_latency_cost: %g ", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost);
+                "\nInitial NoC Placement Costs. "
+                "cost: %g, "
+                "aggregate_bandwidth_cost: %g, "
+                "latency_cost: %g, "
+                "n_met_latency_constraints: %d, "
+                "latency_overrun_cost: %g, "
+                "congestion_cost: %g, "
+                "accum_congested_ratio: %g, "
+                "n_congested_links: %d \n",
+                calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+                costs.noc_cost_terms.aggregate_bandwidth,
+                costs.noc_cost_terms.latency,
+                get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_cost_terms.latency_overrun,
+                costs.noc_cost_terms.congestion,
+                get_total_congestion_bandwidth_ratio(),
+                get_number_of_congested_noc_links());
+
+
+
     }
     //Draw the initial placement
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
@@ -959,7 +992,7 @@ void try_place(const Netlist<>& net_list,
     if (skip_anneal == false) {
         //Table header
         VTR_LOG("\n");
-        print_place_status_header();
+        print_place_status_header(noc_opts.noc);
 
         /* Outer loop of the simulated annealing begins */
         do {
@@ -985,7 +1018,7 @@ void try_place(const Netlist<>& net_list,
                 }
             }
 
-            //move the appropoiate move_generator to be the current used move generator
+            //move the appropriate move_generator to be the current used move generator
             assign_current_move_generator(move_generator, move_generator2,
                                           agent_state, placer_opts, false, current_move_generator);
 
@@ -1008,7 +1041,8 @@ void try_place(const Netlist<>& net_list,
             ++state.num_temps;
 
             print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                               critical_path.delay(), sTNS, sWNS, tot_iter);
+                               critical_path.delay(), sTNS, sWNS, tot_iter,
+                               noc_opts.noc, costs.noc_cost_terms);
 
             if (placer_opts.place_algorithm.is_timing_driven()
                 && placer_opts.place_agent_multistate
@@ -1079,7 +1113,8 @@ void try_place(const Netlist<>& net_list,
         }
 
         print_place_status(state, stats, temperature_timer.elapsed_sec(),
-                           critical_path.delay(), sTNS, sWNS, tot_iter);
+                           critical_path.delay(), sTNS, sWNS, tot_iter,
+                           noc_opts.noc, costs.noc_cost_terms);
     }
     auto post_quench_timing_stats = timing_ctx.stats;
 
@@ -1171,8 +1206,41 @@ void try_place(const Netlist<>& net_list,
     // print the noc costs info
     if (noc_opts.noc) {
         sprintf(msg,
-                "\nNoC Placement Costs. noc_aggregate_bandwidth_cost: %g noc_latency_cost: %g noc_latency_constraints_cost: %d", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost, get_number_of_traffic_flows_with_latency_cons_met());
-        VTR_LOG("NoC Placement Costs. noc_aggregate_bandwidth_cost: %g, noc_latency_cost: %g, noc_latency_constraints_cost: %d, \n", costs.noc_aggregate_bandwidth_cost, costs.noc_latency_cost, get_number_of_traffic_flows_with_latency_cons_met());
+                "\nNoC Placement Costs. "
+                "cost: %g, "
+                "aggregate_bandwidth_cost: %g, "
+                "latency_cost: %g, "
+                "n_met_latency_constraints: %d, "
+                "latency_overrun_cost: %g, "
+                "congestion_cost: %g, "
+                "accum_congested_ratio: %g, "
+                "n_congested_links: %d \n",
+                calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+                costs.noc_cost_terms.aggregate_bandwidth,
+                costs.noc_cost_terms.latency,
+                get_number_of_traffic_flows_with_latency_cons_met(),
+                costs.noc_cost_terms.latency_overrun,
+                costs.noc_cost_terms.congestion,
+                get_total_congestion_bandwidth_ratio(),
+                get_number_of_congested_noc_links());
+
+        VTR_LOG("\nNoC Placement Costs. "
+            "cost: %g, "
+            "aggregate_bandwidth_cost: %g, "
+            "latency_cost: %g, "
+            "n_met_latency_constraints: %d, "
+            "latency_overrun_cost: %g, "
+            "congestion_cost: %g, "
+            "accum_congested_ratio: %g, "
+            "n_congested_links: %d \n",
+            calculate_noc_cost(costs.noc_cost_terms, costs.noc_cost_norm_factors, noc_opts),
+            costs.noc_cost_terms.aggregate_bandwidth,
+            costs.noc_cost_terms.latency,
+            get_number_of_traffic_flows_with_latency_cons_met(),
+            costs.noc_cost_terms.latency_overrun,
+            costs.noc_cost_terms.congestion,
+            get_total_congestion_bandwidth_ratio(),
+            get_number_of_congested_noc_links());
     }
     update_screen(ScreenUpdatePriority::MAJOR, msg, PLACEMENT, timing_info);
     // Print out swap statistics
@@ -1357,64 +1425,63 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts,
                                          const PlaceDelayModel* delay_model,
                                          const PlacerCriticalities* criticalities,
                                          t_placer_costs* costs) {
+    auto check_and_print_cost = [](double new_cost,
+                                   double old_cost,
+                                   const std::string& cost_name) {
+        if (!vtr::isclose(new_cost, old_cost, ERROR_TOL, 0.)) {
+            std::string msg = vtr::string_fmt(
+                "in recompute_costs_from_scratch: new_%s = %g, old %s = %g, ERROR_TOL = %g\n",
+                cost_name.c_str(), new_cost, cost_name.c_str(), old_cost, ERROR_TOL);
+            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
+        }
+    };
+
     double new_bb_cost = recompute_bb_cost();
-    if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) {
-        std::string msg = vtr::string_fmt(
-            "in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n",
-            new_bb_cost, costs->bb_cost);
-        VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-    }
+    check_and_print_cost(new_bb_cost, costs->bb_cost, "bb_cost");
     costs->bb_cost = new_bb_cost;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
         comp_td_costs(delay_model, *criticalities, &new_timing_cost);
-        if (fabs(
-                new_timing_cost
-                - costs->timing_cost)
-            > costs->timing_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n",
-                new_timing_cost, costs->timing_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
+        check_and_print_cost(new_timing_cost, costs->timing_cost, "timing_cost");
         costs->timing_cost = new_timing_cost;
     } else {
         VTR_ASSERT(placer_opts.place_algorithm == BOUNDING_BOX_PLACE);
-
         costs->cost = new_bb_cost * costs->bb_cost_norm;
     }
 
     if (noc_opts.noc) {
-        double new_noc_aggregate_bandwidth_cost = 0.;
-        double new_noc_latency_cost = 0.;
-        recompute_noc_costs(new_noc_aggregate_bandwidth_cost, new_noc_latency_cost);
+        NocCostTerms new_noc_cost;
+        recompute_noc_costs(new_noc_cost);
 
-        if (fabs(
-                new_noc_aggregate_bandwidth_cost
-                - costs->noc_aggregate_bandwidth_cost)
-            > costs->noc_aggregate_bandwidth_cost * ERROR_TOL) {
-            std::string msg = vtr::string_fmt(
-                "in recompute_costs_from_scratch: new_noc_aggregate_bandwidth_cost = %g, old noc_aggregate_bandwidth_cost = %g, ERROR_TOL = %g\n",
-                new_noc_aggregate_bandwidth_cost, costs->noc_aggregate_bandwidth_cost, ERROR_TOL);
-            VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-        }
-        costs->noc_aggregate_bandwidth_cost = new_noc_aggregate_bandwidth_cost;
+        check_and_print_cost(new_noc_cost.aggregate_bandwidth,
+                             costs->noc_cost_terms.aggregate_bandwidth,
+                             "noc_aggregate_bandwidth");
+        costs->noc_cost_terms.aggregate_bandwidth = new_noc_cost.aggregate_bandwidth;
 
         // only check if the recomputed cost and the current noc latency cost are within the error tolerance if the cost is above 1 picosecond.
         // Otherwise, there is no need to check (we expect the latency cost to be above the threshold of 1 picosecond)
-        if (new_noc_latency_cost > MIN_EXPECTED_NOC_LATENCY_COST) {
-            if (fabs(
-                    new_noc_latency_cost
-                    - costs->noc_latency_cost)
-                > costs->noc_latency_cost * ERROR_TOL) {
-                std::string msg = vtr::string_fmt(
-                    "in recompute_costs_from_scratch: new_noc_latency_cost = %g, old noc_latency_cost = %g, ERROR_TOL = %g\n",
-                    new_noc_latency_cost, costs->noc_latency_cost, ERROR_TOL);
-                VPR_ERROR(VPR_ERROR_PLACE, msg.c_str());
-            }
+        if (new_noc_cost.latency > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency,
+                                 costs->noc_cost_terms.latency,
+                                 "noc_latency_cost");
         }
-        costs->noc_latency_cost = new_noc_latency_cost;
+        costs->noc_cost_terms.latency = new_noc_cost.latency;
+
+        if (new_noc_cost.latency_overrun > MIN_EXPECTED_NOC_LATENCY_COST) {
+            check_and_print_cost(new_noc_cost.latency_overrun,
+                                 costs->noc_cost_terms.latency_overrun,
+                                 "noc_latency_overrun_cost");
+        }
+        costs->noc_cost_terms.latency_overrun = new_noc_cost.latency_overrun;
+
+        if (new_noc_cost.congestion > MIN_EXPECTED_NOC_CONGESTION_COST) {
+            check_and_print_cost(new_noc_cost.congestion,
+                                 costs->noc_cost_terms.congestion,
+                                 "noc_congestion_cost");
+        }
+        costs->noc_cost_terms.congestion = new_noc_cost.congestion;
+
     }
 }
 
@@ -1604,7 +1671,6 @@ static e_move_result try_swap(const t_annealing_state* state,
 
     float rlim_escape_fraction = placer_opts.rlim_escape_fraction;
     float timing_tradeoff = placer_opts.timing_tradeoff;
-    double noc_placement_weighting = noc_opts.noc_placement_weighting;
 
     PlaceCritParams crit_params;
     crit_params.crit_exponent = state->crit_exponent;
@@ -1752,14 +1818,14 @@ static e_move_result try_swap(const t_annealing_state* state,
             delta_c = bb_delta_c * costs->bb_cost_norm;
         }
 
-        double noc_aggregate_bandwidth_delta_c = 0; // change in the NoC aggregate bandwidth cost
-        double noc_latency_delta_c = 0;             // change in the NoC latency cost
+
+        NocCostTerms noc_delta_c; // change in NoC cost
         /* Update the NoC datastructure and costs*/
         if (noc_opts.noc) {
-            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_aggregate_bandwidth_delta_c, noc_latency_delta_c, noc_opts);
+            find_affected_noc_routers_and_update_noc_costs(blocks_affected, noc_delta_c);
 
             // Include the NoC delta costs in the total cost change for this swap
-            delta_c = delta_c + noc_placement_weighting * (noc_latency_delta_c * costs->noc_latency_cost_norm + noc_aggregate_bandwidth_delta_c * costs->noc_aggregate_bandwidth_cost_norm);
+            delta_c += calculate_noc_cost(noc_delta_c, costs->noc_cost_norm_factors, noc_opts);
         }
 
         /* 1 -> move accepted, 0 -> rejected. */
@@ -1811,9 +1877,7 @@ static e_move_result try_swap(const t_annealing_state* state,
             }
             if (noc_opts.noc) {
                 commit_noc_costs();
-
-                costs->noc_aggregate_bandwidth_cost += noc_aggregate_bandwidth_delta_c;
-                costs->noc_latency_cost += noc_latency_delta_c;
+                *costs += noc_delta_c;
             }
 
             //Highlights the new block when manual move is selected.
@@ -2282,7 +2346,7 @@ static double get_total_cost(t_placer_costs* costs, const t_placer_opts& placer_
 
     if (noc_opts.noc) {
         // in noc mode we include noc aggregate bandwidth and noc latency
-        total_cost += calculate_noc_cost(*costs, noc_opts);
+        total_cost += calculate_noc_cost(costs->noc_cost_terms, costs->noc_cost_norm_factors, noc_opts);
     }
 
     return total_cost;
@@ -4162,15 +4226,27 @@ static void update_screen_debug() {
 }
 #endif
 
-static void print_place_status_header() {
-    VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
-    VTR_LOG(
-        "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
-    VTR_LOG(
-        "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
-    VTR_LOG(
-        "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+static void print_place_status_header(bool noc_enabled) {
+    if (!noc_enabled) {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                 \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------\n");
+    } else {
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- ---------  ---------\n");
+        VTR_LOG(
+            "Tnum   Time       T Av Cost Av BB Cost Av TD Cost     CPD       sTNS     sWNS Ac Rate Std Dev  R lim Crit Exp Tot Moves  Alpha Agg. BW  Agg. Lat Lat Over. NoC Cong.\n");
+        VTR_LOG(
+            "      (sec)                                          (ns)       (ns)     (ns)                                                   (bps)     (ns)     (ns)             \n");
+        VTR_LOG(
+            "---- ------ ------- ------- ---------- ---------- ------- ---------- -------- ------- ------- ------ -------- --------- ------ -------- -------- --------- ---------\n");
+    }
+
 }
 
 static void print_place_status(const t_annealing_state& state,
@@ -4179,22 +4255,32 @@ static void print_place_status(const t_annealing_state& state,
                                float cpd,
                                float sTNS,
                                float sWNS,
-                               size_t tot_moves) {
+                               size_t tot_moves,
+                               bool noc_enabled,
+                               const NocCostTerms& noc_cost_terms) {
     VTR_LOG(
-        "%4zu "
-        "%6.1f "
-        "%7.1e "
+        "%4zu %6.1f %7.1e "
         "%7.3f %10.2f %-10.5g "
         "%7.3f % 10.3g % 8.3f "
         "%7.3f %7.4f %6.1f %8.2f",
         state.num_temps, elapsed_sec, state.t,
-        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost, 1e9 * cpd,
-        1e9 * sTNS, 1e9 * sWNS, stats.success_rate, stats.std_dev,
-        state.rlim, state.crit_exponent);
+        stats.av_cost, stats.av_bb_cost, stats.av_timing_cost,
+        1e9 * cpd, 1e9 * sTNS, 1e9 * sWNS,
+        stats.success_rate, stats.std_dev, state.rlim, state.crit_exponent);
 
     pretty_print_uint(" ", tot_moves, 9, 3);
 
-    VTR_LOG(" %6.3f\n", state.alpha);
+    VTR_LOG(" %6.3f", state.alpha);
+
+    if (noc_enabled) {
+        VTR_LOG(
+            " %7.2e %7.2e"
+            " %8.2e %8.2f",
+            noc_cost_terms.aggregate_bandwidth, noc_cost_terms.latency,
+            noc_cost_terms.latency_overrun, noc_cost_terms.congestion);
+    }
+
+    VTR_LOG("\n");
     fflush(stdout);
 }
 
diff --git a/vpr/src/place/place_checkpoint.cpp b/vpr/src/place/place_checkpoint.cpp
index dd9b9a0d9f1..73d49e6e80c 100644
--- a/vpr/src/place/place_checkpoint.cpp
+++ b/vpr/src/place/place_checkpoint.cpp
@@ -63,7 +63,7 @@ void restore_best_placement(t_placement_checkpoint& placement_checkpoint,
          * and need to be re-computed from scratch.
          */
         if (noc_opts.noc) {
-            reinitialize_noc_routing(noc_opts, costs);
+            reinitialize_noc_routing(costs);
         }
 
         VTR_LOG("\nCheckpoint restored\n");
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 75ff2d2bf12..6c7f506ee3e 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -9,17 +9,14 @@
 #include "draw_global.h"
 #include "place_constraints.h"
 
-/* File-scope routines */
-static GridBlock init_grid_blocks();
-
 /**
- * @brief Initialize the placer's block-grid dual direction mapping.
- *
- * Forward direction - block to grid: place_ctx.block_locs.
- * Reverse direction - grid to block: place_ctx.grid_blocks.
+ * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
  *
- * Initialize both of them to empty states.
+ * The container at each grid block location should have a length equal to the
+ * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
  */
+static GridBlock init_grid_blocks();
+
 void init_placement_context() {
     auto& place_ctx = g_vpr_ctx.mutable_placement();
     auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -32,12 +29,6 @@ void init_placement_context() {
     place_ctx.grid_blocks = init_grid_blocks();
 }
 
-/**
- * @brief Initialize `grid_blocks`, the inverse structure of `block_locs`.
- *
- * The container at each grid block location should have a length equal to the
- * subtile capacity of that block. Unused subtile would be marked EMPTY_BLOCK_ID.
- */
 static GridBlock init_grid_blocks() {
     auto& device_ctx = g_vpr_ctx.device();
     int num_layers = device_ctx.grid.get_num_layers();
@@ -56,12 +47,6 @@ static GridBlock init_grid_blocks() {
     return grid_blocks;
 }
 
-/**
- * @brief Mutator: updates the norm factors in the outer loop iteration.
- *
- * At each temperature change we update these values to be used
- * for normalizing the trade-off between timing and wirelength (bb)
- */
 void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         bb_cost_norm = 1 / bb_cost;
@@ -73,6 +58,12 @@ void t_placer_costs::update_norm_factors() {
     }
 }
 
+t_placer_costs& t_placer_costs::operator+=(const NocCostTerms& noc_delta_cost) {
+    noc_cost_terms += noc_delta_cost;
+
+    return *this;
+}
+
 ///@brief Constructor: Initialize all annealing state variables and macros.
 t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
                                      float first_t,
@@ -105,20 +96,6 @@ t_annealing_state::t_annealing_state(const t_annealing_sched& annealing_sched,
     UPPER_RLIM = std::max(grid.width() - 1, grid.height() - 1);
 }
 
-/**
- * @brief Get the initial limit for inner loop block move attempt limit.
- *
- * There are two ways to scale the move limit.
- * e_place_effort_scaling::CIRCUIT
- *      scales the move limit proportional to num_blocks ^ (4/3)
- * e_place_effort_scaling::DEVICE_CIRCUIT
- *      scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
- *
- * The second method is almost identical to the first one when the device
- * is highly utilized (device_size ~ num_blocks). For low utilization devices
- * (device_size >> num_blocks), the search space is larger, so the second method
- * performs more moves to ensure better optimization.
- */
 int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched) {
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -142,23 +119,13 @@ int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sch
     return move_lim;
 }
 
-/**
- * @brief Update the annealing state according to the annealing schedule selected.
- *
- *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
- *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
- *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
- *                See doc/src/vpr/dusty_sa.rst for more details.
- *
- * @return True->continues the annealing. False->exits the annealing.
- */
 bool t_annealing_state::outer_loop_update(float success_rate,
                                           const t_placer_costs& costs,
                                           const t_placer_opts& placer_opts,
                                           const t_annealing_sched& annealing_sched) {
 #ifndef NO_GRAPHICS
     t_draw_state* draw_state = get_draw_state_vars();
-    if (draw_state->list_of_breakpoints.size() != 0) {
+    if (!draw_state->list_of_breakpoints.empty()) {
         /* Update temperature in the current information variable. */
         get_bp_state_globals()->get_glob_breakpoint_state()->temp_count++;
     }
@@ -237,33 +204,12 @@ bool t_annealing_state::outer_loop_update(float success_rate,
     return true;
 }
 
-/**
- * @brief Update the range limiter to keep acceptance prob. near 0.44.
- *
- * Use a floating point rlim to allow gradual transitions at low temps.
- * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
- */
 void t_annealing_state::update_rlim(float success_rate) {
     rlim *= (1. - 0.44 + success_rate);
     rlim = std::min(rlim, UPPER_RLIM);
     rlim = std::max(rlim, FINAL_RLIM);
 }
 
-/**
- * @brief Update the criticality exponent.
- *
- * When rlim shrinks towards the FINAL_RLIM value (indicating
- * that we are fine-tuning a more optimized placement), we can
- * focus more on a smaller number of critical connections.
- * To achieve this, we make the crit_exponent sharper, so that
- * critical connections would become more critical than before.
- *
- * We calculate how close rlim is to its final value comparing
- * to its initial value. Then, we apply the same scaling factor
- * on the crit_exponent so that it lands on the suitable value
- * between td_place_exp_first and td_place_exp_last. The scaling
- * factor is calculated and applied linearly.
- */
 void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
     /* If rlim == FINAL_RLIM, then scale == 0. */
     float scale = 1 - (rlim - FINAL_RLIM) * INVERSE_DELTA_RLIM;
@@ -273,11 +219,6 @@ void t_annealing_state::update_crit_exponent(const t_placer_opts& placer_opts) {
                     + placer_opts.td_place_exp_first;
 }
 
-/**
- * @brief Update the move limit based on the success rate.
- *
- * The value is bounded between 1 and move_lim_max.
- */
 void t_annealing_state::update_move_lim(float success_target, float success_rate) {
     move_lim = move_lim_max * (success_target / success_rate);
     move_lim = std::min(move_lim, move_lim_max);
@@ -319,13 +260,6 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int
     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
 }
 
-/**
- * @brief Returns the standard deviation of data set x.
- *
- * There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
- * is the average x. All operations are done in double precision, since round off
- * error can be a problem in the initial temp. std_dev calculation for big circuits.
- */
 double get_std_dev(int n, double sum_x_squared, double av_x) {
     double std_dev;
     if (n <= 1) {
@@ -371,7 +305,7 @@ void zero_initialize_grid_blocks() {
                 place_ctx.grid_blocks.set_usage({i, j, layer_num}, 0);
                 auto tile = device_ctx.grid.get_physical_type({i, j, layer_num});
 
-                for (auto sub_tile : tile->sub_tiles) {
+                for (const auto& sub_tile : tile->sub_tiles) {
                     auto capacity = sub_tile.capacity;
 
                     for (int k = 0; k < capacity.total(); k++) {
@@ -385,15 +319,6 @@ void zero_initialize_grid_blocks() {
     }
 }
 
-/**
- * @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
- *
- *   @param legal_pos
- *              a lookup of all subtiles by sub_tile type
- *              legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations 
- *              of the proper tile type and sub_tile type
- *
- */
 void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos) {
     auto& device_ctx = g_vpr_ctx.device();
     auto& place_ctx = g_vpr_ctx.placement();
@@ -420,7 +345,7 @@ void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vecto
                             continue;
                         }
                         // If this is the anchor position of a block, add it to the legal_pos.
-                        // Otherwise don't, so large blocks aren't added multiple times.
+                        // Otherwise, don't, so large blocks aren't added multiple times.
                         if (device_ctx.grid.get_width_offset({i, j, layer_num}) == 0 && device_ctx.grid.get_height_offset({i, j, layer_num}) == 0) {
                             int itype = tile->index;
                             int isub_tile = sub_tile.index;
@@ -504,7 +429,7 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
         /*
          * analytical placement approach do not need to make sure whether location could accommodate more blocks
-         * since overused locations will be spreaded by legalizer afterward.
+         * since overused locations will be spread by legalizer afterward.
          * floorplan constraint is not supported by analytical placement yet, 
          * hence, if macro_can_be_placed is called from analytical placer, no further actions are required. 
          */
@@ -547,3 +472,25 @@ bool macro_can_be_placed(t_pl_macro pl_macro, t_pl_loc head_pos, bool check_all_
 
     return (mac_can_be_placed);
 }
+
+NocCostTerms::NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest)
+    : aggregate_bandwidth(agg_bw)
+    , latency(lat)
+    , latency_overrun(lat_overrun)
+    , congestion(congest) {}
+
+NocCostTerms::NocCostTerms()
+    : aggregate_bandwidth(0)
+    , latency(0)
+    , latency_overrun(0)
+    , congestion(0) {}
+
+NocCostTerms& NocCostTerms::operator+=(const NocCostTerms& noc_delta_cost) {
+    aggregate_bandwidth += noc_delta_cost.aggregate_bandwidth;
+    latency += noc_delta_cost.latency;
+    latency_overrun += noc_delta_cost.latency_overrun;
+    congestion += noc_delta_cost.congestion;
+
+    return *this;
+}
+
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index cc903cf4f71..12bd6ce745b 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -12,6 +12,39 @@
 #include "vtr_vector_map.h"
 #include "globals.h"
 
+// forward declaration of t_placer_costs so that it can be used an argument
+// in NocCostTerms constructor
+class t_placer_costs;
+
+/**
+ * @brief Data structure that stores different cost terms for NoC placement.
+ * This data structure can also be used to store normalization and weighting
+ * factors for NoC-related cost terms.
+ *
+ *   @param aggregate_bandwidth The aggregate NoC bandwidth cost. This is
+ *   computed by summing all used link bandwidths.
+ *   @param latency The NoC latency cost, calculated as the sum of latencies
+ *   experienced by each traffic flow.
+ *   @param latency_overrun Sum of latency overrun for traffic flows that have
+ *   a latency constraint.
+ *   @param congestion The NoC congestion cost, i.e. how over-utilized
+ *   NoC links are. This is computed by dividing over-utilized bandwidth
+ *   by link bandwidth, and summing all computed ratios.
+ */
+struct NocCostTerms {
+  public:
+    NocCostTerms();
+    NocCostTerms(const NocCostTerms&) = default;
+    NocCostTerms(double agg_bw, double lat, double lat_overrun, double congest);
+    NocCostTerms& operator=(const NocCostTerms& other) = default;
+    NocCostTerms& operator+=(const NocCostTerms& noc_delta_cost);
+
+    double aggregate_bandwidth = 0.0;
+    double latency = 0.0;
+    double latency_overrun = 0.0;
+    double congestion = 0.0;
+};
+
 /**
  * @brief Data structure that stores different cost values in the placer.
  *
@@ -34,11 +67,14 @@
  *   @param timing_cost_norm The normalization factor for the timing cost, which
  *              is upper-bounded by the value of MAX_INV_TIMING_COST.
  *
+ *   @param noc_cost_terms NoC-related cost terms
+ *   @param noc_cost_norm_factors Normalization factors for NoC-related cost terms.
+ *
  *   @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
  *              with very lax timing constraints, which avoids multiplying by a
  *              gigantic timing_cost_norm when auto-normalizing. The exact value
  *              of this cost has relatively little impact, but should be large
- *              enough to not affect the timing costs computatation for normal 
+ *              enough to not affect the timing costs computation for normal
  *              constraints.
  *
  *   @param place_algorithm Determines how the member values are updated upon
@@ -51,19 +87,31 @@ class t_placer_costs {
     double timing_cost = 0.;
     double bb_cost_norm = 0.;
     double timing_cost_norm = 0.;
-    double noc_aggregate_bandwidth_cost = 0.;
-    double noc_aggregate_bandwidth_cost_norm = 0.;
-    double noc_latency_cost = 0.;
-    double noc_latency_cost_norm = 0.;
+
+    NocCostTerms noc_cost_terms;
+    NocCostTerms noc_cost_norm_factors;
 
   public: //Constructor
-    t_placer_costs(t_place_algorithm algo)
+    explicit t_placer_costs(t_place_algorithm algo)
         : place_algorithm(algo) {}
-    t_placer_costs() {}
+    t_placer_costs() = default;
 
   public: //Mutator
+    /**
+    * @brief Mutator: updates the norm factors in the outer loop iteration.
+    *
+    * At each temperature change we update these values to be used
+    * for normalizing the trade-off between timing and wirelength (bb)
+    */
     void update_norm_factors();
 
+    /**
+    * @brief Accumulates NoC cost difference terms
+    *
+    * @param noc_delta_cost Cost difference for NoC-related costs terms
+    */
+    t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
+
   private:
     double MAX_INV_TIMING_COST = 1.e12;
     t_place_algorithm place_algorithm;
@@ -144,14 +192,52 @@ class t_annealing_state {
                       int num_layers);
 
   public: //Mutator
+    /**
+    * @brief Update the annealing state according to the annealing schedule selected.
+    *
+    *   USER_SCHED:  A manual fixed schedule with fixed alpha and exit criteria.
+    *   AUTO_SCHED:  A more sophisticated schedule where alpha varies based on success ratio.
+    *   DUSTY_SCHED: This schedule jumps backward and slows down in response to success ratio.
+    *                See doc/src/vpr/dusty_sa.rst for more details.
+    *
+    * @return True->continues the annealing. False->exits the annealing.
+    */
     bool outer_loop_update(float success_rate,
                            const t_placer_costs& costs,
                            const t_placer_opts& placer_opts,
                            const t_annealing_sched& annealing_sched);
 
   private: //Mutator
+    /**
+    * @brief Update the range limiter to keep acceptance prob. near 0.44.
+    *
+    * Use a floating point rlim to allow gradual transitions at low temps.
+    * The range is bounded by 1 (FINAL_RLIM) and the grid size (UPPER_RLIM).
+    */
     inline void update_rlim(float success_rate);
+
+    /**
+    * @brief Update the criticality exponent.
+    *
+    * When rlim shrinks towards the FINAL_RLIM value (indicating
+    * that we are fine-tuning a more optimized placement), we can
+    * focus more on a smaller number of critical connections.
+    * To achieve this, we make the crit_exponent sharper, so that
+    * critical connections would become more critical than before.
+    *
+    * We calculate how close rlim is to its final value comparing
+    * to its initial value. Then, we apply the same scaling factor
+    * on the crit_exponent so that it lands on the suitable value
+    * between td_place_exp_first and td_place_exp_last. The scaling
+    * factor is calculated and applied linearly.
+    */
     inline void update_crit_exponent(const t_placer_opts& placer_opts);
+
+    /**
+    * @brief Update the move limit based on the success rate.
+    *
+    * The value is bounded between 1 and move_lim_max.
+    */
     inline void update_move_lim(float success_target, float success_rate);
 };
 
@@ -209,13 +295,39 @@ class t_placer_statistics {
     void single_swap_update(const t_placer_costs& costs);
 };
 
-///@brief Initialize the placer's block-grid dual direction mapping.
+/**
+ * @brief Initialize the placer's block-grid dual direction mapping.
+ *
+ * Forward direction - block to grid: place_ctx.block_locs.
+ * Reverse direction - grid to block: place_ctx.grid_blocks.
+ *
+ * Initialize both of them to empty states.
+ */
 void init_placement_context();
 
-///@brief Get the initial limit for inner loop block move attempt limit.
+/**
+ * @brief Get the initial limit for inner loop block move attempt limit.
+ *
+ * There are two ways to scale the move limit.
+ * e_place_effort_scaling::CIRCUIT
+ *      scales the move limit proportional to num_blocks ^ (4/3)
+ * e_place_effort_scaling::DEVICE_CIRCUIT
+ *      scales the move limit proportional to device_size ^ (2/3) * num_blocks ^ (2/3)
+ *
+ * The second method is almost identical to the first one when the device
+ * is highly utilized (device_size ~ num_blocks). For low utilization devices
+ * (device_size >> num_blocks), the search space is larger, so the second method
+ * performs more moves to ensure better optimization.
+ */
 int get_initial_move_lim(const t_placer_opts& placer_opts, const t_annealing_sched& annealing_sched);
 
-///@brief Returns the standard deviation of data set x.
+/**
+ * @brief Returns the standard deviation of data set x.
+ *
+ * There are n sample points, sum_x_squared is the summation over n of x^2 and av_x
+ * is the average x. All operations are done in double precision, since round off
+ * error can be a problem in the initial temp. std_dev calculation for big circuits.
+ */
 double get_std_dev(int n, double sum_x_squared, double av_x);
 
 ///@brief Initialize usage to 0 and blockID to EMPTY_BLOCK_ID for all place_ctx.grid_block locations
@@ -224,7 +336,15 @@ void zero_initialize_grid_blocks();
 ///@brief a utility to calculate grid_blocks given the updated block_locs (used in restore_checkpoint)
 void load_grid_blocks_from_block_locs();
 
-///@brief Builds legal_pos structure. legal_pos[type->index] is an array that gives every legal value of (x,y,z) that can accommodate a block.
+/**
+ * @brief Builds (alloc and load) legal_pos that holds all the legal locations for placement
+ *
+ *   @param legal_pos
+ *              a lookup of all subtiles by sub_tile type
+ *              legal_pos[0..device_ctx.num_block_types-1][0..num_sub_tiles - 1] = std::vector<t_pl_loc> of all the legal locations
+ *              of the proper tile type and sub_tile type
+ *
+ */
 void alloc_and_load_legal_placement_locations(std::vector<std::vector<std::vector<t_pl_loc>>>& legal_pos);
 
 ///@brief Performs error checking to see if location is legal for block type, and sets the location and grid usage of the block if it is legal.
diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp
index 5819485e254..db422bea509 100644
--- a/vpr/src/util/vpr_utils.cpp
+++ b/vpr/src/util/vpr_utils.cpp
@@ -3,6 +3,7 @@
 #include <regex>
 #include <algorithm>
 #include <sstream>
+#include <string.h>
 
 #include "vtr_assert.h"
 #include "vtr_log.h"
@@ -17,7 +18,6 @@
 #include "vpr_utils.h"
 #include "cluster_placement.h"
 #include "place_macro.h"
-#include "string.h"
 #include "pack_types.h"
 #include "device_grid.h"
 #include "timing_fail_error.h"
@@ -181,7 +181,7 @@ void sync_grid_to_blocks() {
         }
 
         if (device_ctx.grid.get_width_offset({blk_x, blk_y, blk_layer}) != 0 || device_ctx.grid.get_height_offset({blk_x, blk_y, blk_layer}) != 0) {
-            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placment for cluster_ctx.blocks %lu at (%d, %d, %d, %d).",
+            VPR_FATAL_ERROR(VPR_ERROR_PLACE, "Large block not aligned in placement for cluster_ctx.blocks %lu at (%d, %d, %d, %d).",
                             size_t(blk_id), blk_x, blk_y, blk_z, blk_layer);
         }
 
@@ -675,7 +675,7 @@ void get_pin_range_for_block(const ClusterBlockId blk_id,
     *pin_high = sub_tile.sub_tile_to_tile_pin_indices[rel_pin_high];
 }
 
-t_physical_tile_type_ptr find_tile_type_by_name(std::string name, const std::vector<t_physical_tile_type>& types) {
+t_physical_tile_type_ptr find_tile_type_by_name(const std::string& name, const std::vector<t_physical_tile_type>& types) {
     for (auto const& type : types) {
         if (type.name == name) {
             return &type;
@@ -814,7 +814,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid) {
     return max_type;
 }
 
-InstPort parse_inst_port(std::string str) {
+InstPort parse_inst_port(const std::string& str) {
     InstPort inst_port(str);
 
     auto& device_ctx = g_vpr_ctx.device();
@@ -1172,7 +1172,7 @@ t_pb_graph_pin* get_pb_graph_node_pin_from_block_pin(ClusterBlockId iblock, int
     return nullptr;
 }
 
-const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string port_name) {
+const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name) {
     const t_pb_graph_pin* gpin = find_pb_graph_pin(pb_gnode, port_name, 0);
 
     if (gpin != nullptr) {
@@ -1181,7 +1181,7 @@ const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string po
     return nullptr;
 }
 
-const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, std::string port_name, int index) {
+const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, const std::string& port_name, int index) {
     for (int iport = 0; iport < pb_gnode->num_input_ports; iport++) {
         if (pb_gnode->num_input_pins[iport] < index) continue;
 
@@ -2240,7 +2240,7 @@ void pretty_print_float(const char* prefix, double value, int num_digits, int sc
     }
 }
 
-void print_timing_stats(std::string name,
+void print_timing_stats(const std::string& name,
                         const t_timing_analysis_profile_info& current,
                         const t_timing_analysis_profile_info& past) {
     VTR_LOG("%s timing analysis took %g seconds (%g STA, %g slack) (%zu full updates: %zu setup, %zu hold, %zu combined).\n",
diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h
index 2916506522b..a4c20feb61b 100644
--- a/vpr/src/util/vpr_utils.h
+++ b/vpr/src/util/vpr_utils.h
@@ -153,10 +153,10 @@ std::vector<AtomPinId> find_clb_pin_sink_atom_pins(ClusterBlockId clb, int logic
 std::tuple<ClusterNetId, int, int> find_pb_route_clb_input_net_pin(ClusterBlockId clb, int sink_pb_route_id);
 
 //Returns the port matching name within pb_gnode
-const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, std::string port_name);
+const t_port* find_pb_graph_port(const t_pb_graph_node* pb_gnode, const std::string& port_name);
 
 //Returns the graph pin matching name at pin index
-const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, std::string port_name, int index);
+const t_pb_graph_pin* find_pb_graph_pin(const t_pb_graph_node* pb_gnode, const std::string& port_name, int index);
 
 AtomPinId find_atom_pin(ClusterBlockId blk_id, const t_pb_graph_pin* pb_gpin);
 
@@ -168,7 +168,7 @@ t_physical_tile_type_ptr find_most_common_tile_type(const DeviceGrid& grid);
 
 //Parses a block_name.port[x:y] (e.g. LAB.data_in[3:10]) pin range specification, if no pin range is specified
 //looks-up the block port and fills in the full range
-InstPort parse_inst_port(std::string str);
+InstPort parse_inst_port(const std::string& str);
 
 //Returns the block type which is most likely the logic block
 t_logical_block_type_ptr infer_logic_block_type(const DeviceGrid& grid);
@@ -250,7 +250,7 @@ int max_pins_per_grid_tile();
 void pretty_print_uint(const char* prefix, size_t value, int num_digits, int scientific_precision);
 void pretty_print_float(const char* prefix, double value, int num_digits, int scientific_precision);
 
-void print_timing_stats(std::string name,
+void print_timing_stats(const std::string& name,
                         const t_timing_analysis_profile_info& current,
                         const t_timing_analysis_profile_info& past = t_timing_analysis_profile_info());
 
diff --git a/vpr/test/test_noc_place_utils.cpp b/vpr/test/test_noc_place_utils.cpp
index c6ba3f89c10..8e53ec68ed9 100644
--- a/vpr/test/test_noc_place_utils.cpp
+++ b/vpr/test/test_noc_place_utils.cpp
@@ -33,13 +33,18 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
     noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
+    // set NoC link bandwidth
+    // dist_2 is used to generate traffic flow bandwidths.
+    // Setting the NoC link bandwidth to max() / 5 makes link congestion more likely to happen
+    const double noc_link_bandwidth = dist_2.max() / 5;
+    noc_ctx.noc_model.set_noc_link_bandwidth(noc_link_bandwidth);
+
     // individual router parameters
     int curr_router_id;
     int router_grid_position_x;
@@ -129,7 +134,9 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
         double traffic_flow_bandwidth_usage = (double)dist_2(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -142,11 +149,10 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
         const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
@@ -187,13 +193,15 @@ TEST_CASE("test_initial_noc_placement", "[noc_place_utils]") {
     for (int link_number = 0; link_number < number_of_links; link_number++) {
         NocLinkId current_link_id = (NocLinkId)link_number;
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
+        double golden_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - noc_link_bandwidth, 0.0);
+        double golden_congested_bw_ratio = golden_congested_bandwidth / noc_link_bandwidth;
 
         REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+        REQUIRE(golden_congested_bandwidth == current_link.get_congested_bandwidth());
+        REQUIRE(golden_congested_bw_ratio == current_link.get_congested_bandwidth_ratio());
     }
-
-    // delete the local routing algorithm
-    delete routing_algorithm;
 }
+
 TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // setup random number generation
     std::random_device device;
@@ -213,13 +221,18 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
     // the grid width will be the size of the noc mesh
     noc_ctx.noc_model.set_device_grid_spec((int)MESH_TOPOLOGY_SIZE_NOC_PLACE_UTILS_TEST, 0);
 
+    // set NoC link bandwidth
+    // dist_2 is used to generate traffic flow bandwidths.
+    // Setting the NoC link bandwidth to max() / 5 makes link congestion more likely to happen
+    const double noc_link_bandwidth = dist_2.max() / 5;
+    noc_ctx.noc_model.set_noc_link_bandwidth(noc_link_bandwidth);
+
     // individual router parameters
     int curr_router_id;
     int router_grid_position_x;
@@ -312,7 +325,9 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -330,11 +345,10 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // route all the traffic flows locally
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
@@ -386,22 +400,15 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
 
         // release the cost calculator datastructures
         free_noc_placement_structs();
-
-        // need to delete the local routing algorithm
-        delete routing_algorithm;
     }
 
     SECTION("test_comp_noc_latency_cost") {
         //initialize all the cost calculator datastructures
         allocate_and_load_noc_placement_structs();
 
-        // create the noc options
-        t_noc_opts noc_opts;
-        noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
-        noc_opts.noc_latency_weighting = dist_3(double_engine);
-
-        // create local variable to store the latency cost
+        // create local variable to store the latency cost terms
         double golden_total_noc_latency_costs = 0.;
+        double golden_total_noc_latency_overrun_costs = 0.;
 
         // local router and link latency parameters
         double router_latency = noc_ctx.noc_model.get_noc_router_latency();
@@ -412,26 +419,50 @@ TEST_CASE("test_initial_comp_cost_functions", "[noc_place_utils]") {
             const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
 
             double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_route_sizes[traffic_flow_number] + 1)) + (link_latency * golden_traffic_flow_route_sizes[traffic_flow_number]);
+            double curr_traffic_flow_latency_overrun = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
 
             // calculate the latency cost
-            double current_latency_cost = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
-            current_latency_cost *= curr_traffic_flow.traffic_flow_priority;
+            double current_latency_cost = curr_traffic_flow_latency * curr_traffic_flow.traffic_flow_priority;
+            double current_latency_overrun_cost = curr_traffic_flow_latency_overrun * curr_traffic_flow.traffic_flow_priority;
 
             golden_total_noc_latency_costs += current_latency_cost;
+            golden_total_noc_latency_overrun_costs += current_latency_overrun_cost;
         }
 
-        // run the test function and get the bandwidth calculated
-        double found_latency_cost = comp_noc_latency_cost(noc_opts);
+        // run the test function and get the latency cost calculated
+        auto [found_latency_cost, found_latency_overrun_cost] = comp_noc_latency_cost();
 
-        // compare the test function bandwidth cost to the golden value
+        // compare the test function latency cost to the golden value
         // since we are comparing double numbers we allow a tolerance of difference
         REQUIRE(vtr::isclose(golden_total_noc_latency_costs, found_latency_cost));
+        REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_costs, found_latency_overrun_cost));
 
         // release the cost calculator datastructures
         free_noc_placement_structs();
+    }
+
+    SECTION("test_comp_noc_congestion_cost") {
+        //initialize all the cost calculator datastructures
+        allocate_and_load_noc_placement_structs();
+
+        // create local variable to store the latency cost
+        double golden_total_noc_congestion_costs = 0.;
+
+        for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+            double congested_bw_ratio = link.get_congested_bandwidth_ratio();
+
+            golden_total_noc_congestion_costs += congested_bw_ratio;
+        }
+
+        // run the test function to get the congestion cost
+        double found_congestion_cost = comp_noc_congestion_cost();
 
-        // need to delete the local routing algorithm
-        delete routing_algorithm;
+        // compare the test function congestion cost to the golden value
+        // since we are comparing double numbers we allow a tolerance of difference
+        REQUIRE(vtr::isclose(golden_total_noc_congestion_costs, found_congestion_cost));
+
+        // release the cost calculator datastructures
+        free_noc_placement_structs();
     }
 }
 
@@ -455,7 +486,6 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -472,13 +502,17 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
 
     // setting the NoC parameters
     noc_ctx.noc_model.set_noc_link_latency(1);
     noc_ctx.noc_model.set_noc_router_latency(1);
+    noc_ctx.noc_model.set_noc_link_bandwidth(1);
+
     // needs to be the same as above
     double router_latency = noc_ctx.noc_model.get_noc_router_latency();
     double link_latency = noc_ctx.noc_model.get_noc_link_latency();
+    double link_bandwidth = noc_ctx.noc_model.get_noc_link_bandwidth();
 
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
@@ -568,7 +602,9 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -581,11 +617,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -593,12 +628,16 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // store the traffic flow bandwidth costs and latency costs
     vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_bandwidth_costs;
     vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_latency_costs;
+    vtr::vector<NocTrafficFlowId, double> golden_traffic_flow_latency_overrun_costs;
     golden_traffic_flow_bandwidth_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
     golden_traffic_flow_latency_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
+    golden_traffic_flow_latency_overrun_costs.resize(noc_ctx.noc_traffic_flows_storage.get_number_of_traffic_flows());
+    // store link congestion costs
+    vtr::vector<NocLinkId, double> golden_link_congestion_costs;
+    golden_link_congestion_costs.resize(noc_ctx.noc_model.get_number_of_noc_links());
 
     // stores the change in bandwidth and latency costs from the test function
-    double test_noc_bandwidth_costs = 0;
-    double test_noc_latency_costs = 0;
+    NocCostTerms test_noc_costs;
 
     // we need to route all the traffic flows based on their initial positions
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
@@ -625,31 +664,43 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // also initialize the bandwidth and latency costs for all traffic flows
     // and sum them up to calculate the total initial aggregate bandwidth and latency costs for the NoC
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
-        for (auto& link : golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number]) {
+        for (auto& link : golden_traffic_flow_routes[traffic_flow_id]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
         }
 
         // calculate the bandwidth cost
-        golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number] = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() * curr_traffic_flow.traffic_flow_bandwidth;
-        golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_bandwidth_costs[traffic_flow_id] = golden_traffic_flow_routes[traffic_flow_id].size() * curr_traffic_flow.traffic_flow_bandwidth;
+        golden_traffic_flow_bandwidth_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
 
-        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() + 1)) + (link_latency * golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size());
+        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow_id].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow_id].size());
 
-        golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
-        golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_costs[traffic_flow_id] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow_id] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
+        golden_traffic_flow_latency_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow_id] *= curr_traffic_flow.traffic_flow_priority;
+
+        test_noc_costs.aggregate_bandwidth += golden_traffic_flow_bandwidth_costs[traffic_flow_id];
+        test_noc_costs.latency += golden_traffic_flow_latency_costs[traffic_flow_id];
+        test_noc_costs.latency_overrun += golden_traffic_flow_latency_overrun_costs[traffic_flow_id];
+    }
 
-        test_noc_bandwidth_costs += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
-        test_noc_latency_costs += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
+    // initialize golden congestion cost for all links
+    for (const auto& link : noc_ctx.noc_model.get_noc_links()) {
+        auto link_id = link.get_link_id();
+        golden_link_congestion_costs[link_id] = std::max(golden_link_bandwidths[link_id] - link_bandwidth, 0.0);
+        test_noc_costs.congestion += golden_link_congestion_costs[link_id];
     }
 
     // initialize noc placement structs
     allocate_and_load_noc_placement_structs();
 
-    // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume thi is correct
+    // We need to run these functions as they initialize local variables needed to run the test function within this unit test. we assume this is correct
     comp_noc_aggregate_bandwidth_cost();
-    comp_noc_latency_cost(noc_opts);
+    comp_noc_latency_cost();
+    comp_noc_congestion_cost();
 
     // datastructure that keeps track of moved blocks during placement
     t_pl_blocks_to_be_moved blocks_affected(NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST);
@@ -717,6 +768,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -725,6 +777,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -733,8 +786,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
                 double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-                golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+                golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
                 golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
 
                 routed_traffic_flows.insert(traffic_flow);
             }
@@ -749,6 +804,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and reduce the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // re-route the traffic flow
@@ -757,6 +813,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
                 // go through the current traffic flow and increase the bandwidths of the links
                 for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
                     golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+                    golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
                 }
 
                 // update the costs now
@@ -765,22 +822,25 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
                 double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-                golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+                golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
                 golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+                golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
 
                 routed_traffic_flows.insert(traffic_flow);
             }
         }
 
-        double delta_aggr_band_cost = 0.;
-        double delta_laten_cost = 0.;
+        NocCostTerms delta_cost;
 
         // call the test function
-        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+        find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-        // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-        test_noc_bandwidth_costs += delta_aggr_band_cost;
-        test_noc_latency_costs += delta_laten_cost;
+        // update the test noc cost terms based on the cost changes found by the test functions
+        test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+        test_noc_costs.latency += delta_cost.latency;
+        test_noc_costs.latency_overrun += delta_cost.latency_overrun;
+        test_noc_costs.congestion += delta_cost.congestion;
 
         // need this function to update the local datastructures that store all the traffic flow costs
         commit_noc_costs();
@@ -855,6 +915,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -863,6 +924,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -871,8 +933,10 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
     // this is for the second swapped block
@@ -883,6 +947,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -891,6 +956,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -899,19 +965,22 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
-    double delta_aggr_band_cost = 0.;
-    double delta_laten_cost = 0.;
+    NocCostTerms delta_cost;
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    // update the test noc cost terms based on the cost changes found by the test functions
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -974,6 +1043,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and reduce the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] -= curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // re-route the traffic flow
@@ -982,6 +1052,7 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
         // go through the current traffic flow and increase the bandwidths of the links
         for (auto& link : golden_traffic_flow_routes[traffic_flow]) {
             golden_link_bandwidths[link] += curr_traffic_flow.traffic_flow_bandwidth;
+            golden_link_congestion_costs[link] = std::max(golden_link_bandwidths[link] - link_bandwidth, 0.0);
         }
 
         // update the costs now
@@ -990,20 +1061,23 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
 
         double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[traffic_flow].size() + 1)) + (link_latency * golden_traffic_flow_routes[traffic_flow].size());
 
-        golden_traffic_flow_latency_costs[traffic_flow] = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        golden_traffic_flow_latency_costs[traffic_flow] = curr_traffic_flow_latency;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         golden_traffic_flow_latency_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
+        golden_traffic_flow_latency_overrun_costs[traffic_flow] *= curr_traffic_flow.traffic_flow_priority;
     }
 
     // reset the delta costs
-    delta_aggr_band_cost = 0.;
-    delta_laten_cost = 0.;
+    delta_cost = NocCostTerms();
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    // update the test noc cost terms based on the cost changes found by the test functions
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1059,15 +1133,16 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     // we don't have to calculate the costs or update bandwidths because the swapped router blocks do not have any associated traffic flows //
 
     // reset the delta costs
-    delta_aggr_band_cost = 0.;
-    delta_laten_cost = 0.;
+    delta_cost = NocCostTerms();
 
     // call the test function
-    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_aggr_band_cost, delta_laten_cost, noc_opts);
+    find_affected_noc_routers_and_update_noc_costs(blocks_affected, delta_cost);
 
-    // update the test total noc bandwidth and latency costs based on the cost changes found by the test functions
-    test_noc_bandwidth_costs += delta_aggr_band_cost;
-    test_noc_latency_costs += delta_laten_cost;
+    // update the test noc cost terms based on the cost changes found by the test functions
+    test_noc_costs.aggregate_bandwidth += delta_cost.aggregate_bandwidth;
+    test_noc_costs.latency += delta_cost.latency;
+    test_noc_costs.latency_overrun += delta_cost.latency_overrun;
+    test_noc_costs.congestion += delta_cost.congestion;
 
     // need this function to update the local datastructures that store all the traffic flow costs
     commit_noc_costs();
@@ -1080,102 +1155,166 @@ TEST_CASE("test_find_affected_noc_routers_and_update_noc_costs, test_commit_noc_
     for (int link_number = 0; link_number < number_of_links; link_number++) {
         NocLinkId current_link_id = (NocLinkId)link_number;
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
+        double golden_link_bandwidth = golden_link_bandwidths[current_link_id];
+        double golden_link_congested_bandwidth = std::max(golden_link_bandwidths[current_link_id] - link_bandwidth, 0.0);
+        double golden_link_congested_bandwidth_ratio = golden_link_congested_bandwidth / link_bandwidth;
 
-        REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+        REQUIRE(golden_link_bandwidth == current_link.get_bandwidth_usage());
+        REQUIRE(golden_link_congested_bandwidth == current_link.get_congested_bandwidth());
+        REQUIRE(golden_link_congested_bandwidth_ratio == current_link.get_congested_bandwidth_ratio());
     }
 
-    // now find the total expected noc aggregate bandwidth and latency cost
+    // now find the total expected noc cost terms
     double golden_total_noc_aggr_bandwidth_cost = 0.;
     double golden_total_noc_latency_cost = 0.;
+    double golden_total_noc_latency_overrun_cost = 0.;
+    double golden_total_noc_congestion_cost = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < number_of_created_traffic_flows; traffic_flow_number++) {
-        golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[(NocTrafficFlowId)traffic_flow_number];
-        golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[(NocTrafficFlowId)traffic_flow_number];
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        golden_total_noc_aggr_bandwidth_cost += golden_traffic_flow_bandwidth_costs[traffic_flow_id];
+        golden_total_noc_latency_cost += golden_traffic_flow_latency_costs[traffic_flow_id];
+        golden_total_noc_latency_overrun_cost += golden_traffic_flow_latency_overrun_costs[traffic_flow_id];
     }
 
+    golden_total_noc_congestion_cost = std::accumulate(golden_link_congestion_costs.begin(), golden_link_congestion_costs.end(), 0.0);
+
     // now check whether the expected noc costs that we manually calculated above match the noc costs found through the test function (we allow for a tolerance of difference)
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_latency_costs));
-    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_bandwidth_costs));
+    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    std::cout << golden_total_noc_latency_overrun_cost << " " <<  test_noc_costs.latency_overrun << std::endl;
+    REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun));
+    REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // now test the recompute cost function //
     // The recompute cost function just adds up all traffic flow costs, so it match the expected noc costs that we manually calculated above by summing up all the expected individual traffic flow costs. //
 
     // start by resetting the test cost variables
-    test_noc_bandwidth_costs = 0.;
-    test_noc_latency_costs = 0.;
+    test_noc_costs.aggregate_bandwidth = 0.;
+    test_noc_costs.latency = 0.;
+    test_noc_costs.latency_overrun = 0.;
+    test_noc_costs.congestion = 0.;
 
     // now execute the test function
-    recompute_noc_costs(test_noc_bandwidth_costs, test_noc_latency_costs);
+    recompute_noc_costs(test_noc_costs);
 
     // now verify
-    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_latency_costs));
-    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_bandwidth_costs));
+    REQUIRE(vtr::isclose(golden_total_noc_aggr_bandwidth_cost, test_noc_costs.aggregate_bandwidth));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_cost, test_noc_costs.latency));
+    REQUIRE(vtr::isclose(golden_total_noc_latency_overrun_cost, test_noc_costs.latency_overrun));
+    REQUIRE(vtr::isclose(golden_total_noc_congestion_cost, test_noc_costs.congestion));
 
     // delete local datastructures
     free_noc_placement_structs();
-
-    // need to delete local noc routing algorithm
-    delete routing_algorithm;
 }
+
 TEST_CASE("test_update_noc_normalization_factors", "[noc_place_utils]") {
     // creating local parameters needed for the test
     t_placer_costs costs;
     t_placer_opts placer_opts;
 
     SECTION("Test case where the bandwidth cost is 0") {
-        costs.noc_aggregate_bandwidth_cost = 0.;
-        costs.noc_latency_cost = 1.;
+        costs.noc_cost_terms.aggregate_bandwidth = 0.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the aggregate bandwidth normalized cost
         // this should not be +INF and instead trimmed
-        REQUIRE(costs.noc_aggregate_bandwidth_cost_norm == 1.0);
+        REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.0);
     }
     SECTION("Test case where the latency cost is 0") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 0.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 0.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be +INF and instead trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 1.e12);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12);
     }
     SECTION("Test case where the bandwidth cost is an expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.e9;
-        costs.noc_latency_cost = 0.;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.e9;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the aggregate bandwidth normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_aggregate_bandwidth_cost_norm == 1.e-9);
+        REQUIRE(costs.noc_cost_norm_factors.aggregate_bandwidth == 1.e-9);
     }
     SECTION("Test case where the latency cost is an expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 50.e-12;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 50.e-12;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 2.e10);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 2.e10);
     }
     SECTION("Test case where the latency cost is lower than the smallest expected value") {
-        costs.noc_aggregate_bandwidth_cost = 1.;
-        costs.noc_latency_cost = 999.e-15;
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 999.e-15;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.;
 
         // run the test function
         update_noc_normalization_factors(costs);
 
         // verify the latency normalized cost
         // this should not be trimmed
-        REQUIRE(costs.noc_latency_cost_norm == 1.e12);
+        REQUIRE(costs.noc_cost_norm_factors.latency == 1.e12);
+    }
+    SECTION("Test case where the congestion cost is zero") {
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 0.;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        // this should not be infinite
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3);
+    }
+    SECTION("Test case where the congestion cost is lower than the smallest expected value") {
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 999.e-15;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        // this should not be infinite
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e3);
+    }
+    SECTION("Test case where the congestion cost is an expected value") {
+        costs.noc_cost_terms.aggregate_bandwidth = 1.;
+        costs.noc_cost_terms.latency = 1.;
+        costs.noc_cost_terms.latency_overrun = 1.;
+        costs.noc_cost_terms.congestion = 1.e2;
+
+        // run the test function
+        update_noc_normalization_factors(costs);
+
+        // verify the congestion normalization factor
+        REQUIRE(costs.noc_cost_norm_factors.congestion == 1.e-2);
     }
 }
 TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
@@ -1198,7 +1337,6 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -1215,10 +1353,12 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
 
     // setting the NoC parameters
     noc_ctx.noc_model.set_noc_link_latency(1);
     noc_ctx.noc_model.set_noc_router_latency(1);
+    noc_ctx.noc_model.set_noc_link_bandwidth(1);
 
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
@@ -1306,7 +1446,9 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -1320,11 +1462,10 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -1342,6 +1483,8 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number], noc_ctx.noc_model);
     }
 
+    const vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> initial_golden_traffic_flow_routes = golden_traffic_flow_routes;
+
     // assume this works
     // this is needed to set up the global noc packet router and also global datastructures
     initial_noc_routing();
@@ -1471,7 +1614,7 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
             }
 
             // re-route the traffic flow
-            noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id], router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], golden_traffic_flow_routes[traffic_flow], noc_ctx.noc_model);
+            noc_ctx.noc_flows_router->route_flow(router_where_cluster_is_placed[curr_traffic_flow.source_router_cluster_id],router_where_cluster_is_placed[curr_traffic_flow.sink_router_cluster_id], golden_traffic_flow_routes[traffic_flow], noc_ctx.noc_model);
 
             // go through the current traffic flow and reduce the bandwidths of the links (we only update this in the NoC, since these changes should be rectified by the test function)
             // This shouldn't be updated in the golden bandwidths since we are imitating a swap of blocks and not having a real swap of blocks
@@ -1499,9 +1642,15 @@ TEST_CASE("test_revert_noc_traffic_flow_routes", "[noc_place_utils]") {
         const NocLink& current_link = noc_ctx.noc_model.get_single_noc_link(current_link_id);
 
         REQUIRE(golden_link_bandwidths[current_link_id] == current_link.get_bandwidth_usage());
+
     }
 
-    delete routing_algorithm;
+    for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
+        auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const auto& traffic_flow_route = noc_ctx.noc_traffic_flows_storage.get_traffic_flow_route(traffic_flow_id);
+        const auto& golden_traffic_flow_route = initial_golden_traffic_flow_routes[traffic_flow_id];
+        REQUIRE(traffic_flow_route == golden_traffic_flow_route);
+    }
 }
 TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     // setup random number generation
@@ -1523,7 +1672,6 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     // start by deleting any global datastructures (this is so that we don't have corruption from previous tests)
     noc_ctx.noc_model.clear_noc();
     noc_ctx.noc_traffic_flows_storage.clear_traffic_flows();
-    delete noc_ctx.noc_flows_router;
     place_ctx.block_locs.clear();
 
     // store the reference to device grid with
@@ -1536,23 +1684,21 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     int router_grid_position_y;
 
     // setting the NoC parameters
-    noc_ctx.noc_model.set_noc_link_latency(1);
-    noc_ctx.noc_model.set_noc_router_latency(1);
-
-    double link_latency = 1;
-    double router_latency = 1;
+    const double link_latency = 1.0;
+    const double router_latency = 1.0;
+    const double link_bandwidth = 1.0;
+    noc_ctx.noc_model.set_noc_link_latency(link_latency);
+    noc_ctx.noc_model.set_noc_router_latency(router_latency);
+    noc_ctx.noc_model.set_noc_link_bandwidth(link_bandwidth);
 
     // noc options used in this test
     // we create these randomly
     t_noc_opts noc_opts;
     noc_opts.noc_latency_constraints_weighting = dist_3(double_engine);
     noc_opts.noc_latency_weighting = dist_3(double_engine);
+    noc_opts.noc_congestion_weighting = dist_3(double_engine);
     noc_opts.noc_routing_algorithm = "xy_routing";
 
-    // setting the NoC parameters
-    noc_ctx.noc_model.set_noc_link_latency(1);
-    noc_ctx.noc_model.set_noc_router_latency(1);
-
     // keeps track of which hard router each cluster block is placed
     vtr::vector<ClusterBlockId, NocRouterId> router_where_cluster_is_placed;
 
@@ -1595,6 +1741,11 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         }
     }
 
+    // initialize NoC link bandwidth usage
+    for (auto& noc_link : noc_ctx.noc_model.get_mutable_noc_links()) {
+        noc_link.set_bandwidth_usage(0.0);
+    }
+
     // now we need to create router cluster blocks and passing them to placed at a router hard block as an initial position
     for (int cluster_block_number = 0; cluster_block_number < NUM_OF_LOGICAL_ROUTER_BLOCKS_NOC_PLACE_UTILS_TEST; cluster_block_number++) {
         // since the indexes for the hard router blocks start from 0, we will just place the router clusters on hard router blocks with the same id //
@@ -1639,7 +1790,9 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
         int traffic_flow_priority = dist_1(rand_num_gen);
 
         // create and add the traffic flow
-        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name, source_router_for_traffic_flow, sink_router_for_traffic_flow, traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
+        noc_ctx.noc_traffic_flows_storage.create_noc_traffic_flow(source_traffic_flow_name, sink_traffic_flow_name,
+                                                                  source_router_for_traffic_flow, sink_router_for_traffic_flow,
+                                                                  traffic_flow_bandwidth_usage, traffic_flow_latency_constraint, traffic_flow_priority);
 
         number_of_created_traffic_flows++;
 
@@ -1653,11 +1806,10 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
 
     // now go and route all the traffic flows //
     // start by creating the routing algorithm
-    NocRouting* routing_algorithm_global = new XYRouting();
-    noc_ctx.noc_flows_router = routing_algorithm_global;
+    noc_ctx.noc_flows_router = std::make_unique<XYRouting>();
 
     // create a local routing algorithm for the unit test
-    NocRouting* routing_algorithm = new XYRouting();
+    auto routing_algorithm = std::make_unique<XYRouting>();
 
     // store the traffic flow routes found
     vtr::vector<NocTrafficFlowId, std::vector<NocLinkId>> golden_traffic_flow_routes;
@@ -1665,38 +1817,68 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
 
     // we need to route all the traffic flows based on their initial positions
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         // get the source and sink routers of this traffic flow
         int source_hard_router_id = (size_t)curr_traffic_flow.source_router_cluster_id;
         int sink_hard_routed_id = (size_t)curr_traffic_flow.sink_router_cluster_id;
 
+        // get the current traffic flow route
+        auto& traffic_flow_route = golden_traffic_flow_routes[traffic_flow_id];
+        double traffic_flow_bandwidth = curr_traffic_flow.traffic_flow_bandwidth;
+
         // route it
-        routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number], noc_ctx.noc_model);
+        routing_algorithm->route_flow((NocRouterId)source_hard_router_id, (NocRouterId)sink_hard_routed_id, traffic_flow_route, noc_ctx.noc_model);
+
+        // update link bandwidth utilization
+        for (auto link_id : traffic_flow_route) {
+            auto& noc_link = noc_ctx.noc_model.get_single_mutable_noc_link(link_id);
+            double curr_link_bw_util = noc_link.get_bandwidth_usage();
+            curr_link_bw_util += traffic_flow_bandwidth;
+            noc_link.set_bandwidth_usage(curr_link_bw_util);
+        }
     }
 
-    // variables below store the expected noc costs (latency and bandwidth)
+    // variables below store the expected noc cost terms
     t_placer_costs costs;
-    costs.noc_aggregate_bandwidth_cost = 0.;
-    costs.noc_latency_cost = 0.;
+    costs.noc_cost_terms.aggregate_bandwidth = 0.;
+    costs.noc_cost_terms.latency = 0.;
+    costs.noc_cost_terms.latency_overrun = 0.;
+    costs.noc_cost_terms.congestion = 0.;
 
     for (int traffic_flow_number = 0; traffic_flow_number < NUM_OF_TRAFFIC_FLOWS_NOC_PLACE_UTILS_TEST; traffic_flow_number++) {
-        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow((NocTrafficFlowId)traffic_flow_number);
+        const auto traffic_flow_id = (NocTrafficFlowId)traffic_flow_number;
+        const t_noc_traffic_flow& curr_traffic_flow = noc_ctx.noc_traffic_flows_storage.get_single_noc_traffic_flow(traffic_flow_id);
 
         double curr_bandwidth_cost = 0.;
         double curr_latency_cost = 0.;
+        double curr_latency_overrun_cost = 0.;
+
+        // get the traffic flow route
+        const auto& golden_traffic_flow_route = golden_traffic_flow_routes[traffic_flow_id];
 
         // calculate the bandwidth cost
-        curr_bandwidth_cost = golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() * curr_traffic_flow.traffic_flow_bandwidth;
+        curr_bandwidth_cost = golden_traffic_flow_route.size() * curr_traffic_flow.traffic_flow_bandwidth;
         curr_bandwidth_cost *= curr_traffic_flow.traffic_flow_priority;
 
-        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size() + 1)) + (link_latency * golden_traffic_flow_routes[(NocTrafficFlowId)traffic_flow_number].size());
+        double curr_traffic_flow_latency = (router_latency * (golden_traffic_flow_route.size() + 1)) + (link_latency * golden_traffic_flow_route.size());
 
-        curr_latency_cost = (noc_opts.noc_latency_constraints_weighting * (std::max(0., curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency))) + (noc_opts.noc_latency_weighting * curr_traffic_flow_latency);
+        // calculate the latency cost
+        curr_latency_cost = curr_traffic_flow_latency;
+        curr_latency_overrun_cost = std::max(curr_traffic_flow_latency - curr_traffic_flow.max_traffic_flow_latency, 0.);
         curr_latency_cost *= curr_traffic_flow.traffic_flow_priority;
+        curr_latency_overrun_cost *= curr_traffic_flow.traffic_flow_priority;
 
-        costs.noc_aggregate_bandwidth_cost += curr_bandwidth_cost;
-        costs.noc_latency_cost += curr_latency_cost;
+        costs.noc_cost_terms.aggregate_bandwidth += curr_bandwidth_cost;
+        costs.noc_cost_terms.latency += curr_latency_cost;
+        costs.noc_cost_terms.latency_overrun += curr_latency_overrun_cost;
+    }
+
+    // calculate the congestion cost
+    for (const auto& noc_link : noc_ctx.noc_model.get_noc_links()) {
+        double curr_congestion_cost = noc_link.get_congested_bandwidth_ratio();
+        costs.noc_cost_terms.congestion += curr_congestion_cost;
     }
 
     // this defines the error tolerance that is allowed between the golden noc costs and the costs found by the test function: check_noc_placement_costs
@@ -1712,16 +1894,25 @@ TEST_CASE("test_check_noc_placement_costs", "[noc_place_utils]") {
     }
     SECTION("Case where the check place fails for both NoC costs") {
         // we need to make the aggregate bandwidth cost and latency cost be a value that is larger or smaller than the tolerance value
-        costs.noc_aggregate_bandwidth_cost += (costs.noc_aggregate_bandwidth_cost * error_tolerance * 2);
-        costs.noc_latency_cost -= (costs.noc_latency_cost * error_tolerance * 2);
+        costs.noc_cost_terms.aggregate_bandwidth += (costs.noc_cost_terms.aggregate_bandwidth * error_tolerance * 2);
+        costs.noc_cost_terms.latency -= (costs.noc_cost_terms.latency * error_tolerance * 2);
+        if (costs.noc_cost_terms.latency_overrun == 0) {
+            costs.noc_cost_terms.latency_overrun += MIN_EXPECTED_NOC_LATENCY_COST * error_tolerance * 2;
+        } else {
+            costs.noc_cost_terms.latency_overrun += costs.noc_cost_terms.latency_overrun * error_tolerance * 2;
+        }
+
+        if (costs.noc_cost_terms.congestion == 0) {
+            costs.noc_cost_terms.congestion += MIN_EXPECTED_NOC_CONGESTION_COST * error_tolerance * 2;
+        } else {
+            costs.noc_cost_terms.congestion += costs.noc_cost_terms.congestion * error_tolerance * 2;
+        }
 
         // run the test function
         int error = check_noc_placement_costs(costs, error_tolerance, noc_opts);
 
-        // we expect error to be 2 here, meaning the found costs are not within the tolerance range
-        REQUIRE(error == 2);
+        // we expect error to be 4 here, meaning the found costs are not within the tolerance range
+        REQUIRE(error == 4);
     }
-    // need to delete local noc routing algorithm
-    delete routing_algorithm;
 }
 } // namespace
diff --git a/vpr/test/test_noc_storage.cpp b/vpr/test/test_noc_storage.cpp
index 1cdc57550ad..a1255d31930 100644
--- a/vpr/test/test_noc_storage.cpp
+++ b/vpr/test/test_noc_storage.cpp
@@ -155,6 +155,8 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
 
     // allocate the size for outgoing link vector for each router
     test_noc.make_room_for_noc_router_link_list();
+    // incremental counter used as NocLinkId
+    int noc_link_id_counter = 0;
 
     for (int source_router_id = 0; source_router_id < NUM_OF_ROUTERS; source_router_id++) {
         source = (NocRouterId)source_router_id;
@@ -164,8 +166,12 @@ TEST_CASE("test_add_link", "[vpr_noc]") {
 
             // makes sure we do not create a link for a router who acts as a sink and source
             if (source_router_id != sink_router_id) {
+                // converting the counter to link index
+                link_id = (NocLinkId)noc_link_id_counter;
+                noc_link_id_counter++;
+
                 // add link to the golden reference
-                golden_set.emplace_back(source, sink);
+                golden_set.emplace_back(link_id, source, sink, 0.0);
 
                 // add the link to the NoC
                 test_noc.add_link(source, sink);
diff --git a/vpr/test/test_xy_routing.cpp b/vpr/test/test_xy_routing.cpp
index 67517271f43..49b58662ca2 100644
--- a/vpr/test/test_xy_routing.cpp
+++ b/vpr/test/test_xy_routing.cpp
@@ -109,7 +109,9 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
         std::vector<NocLink> golden_path;
 
         for (int current_router = 7; current_router != 4; current_router--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router - 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router - 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -131,7 +133,9 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
         std::vector<NocLink> golden_path;
 
         for (int current_row = 0; current_row < 3; current_row++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4 + 2), NocRouterId((current_row + 1) * 4 + 2)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4 + 2), NocRouterId((current_row + 1) * 4 + 2));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -154,12 +158,16 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
 
         // generate the horizontal path first
         for (int current_router = 3; current_router != 0; current_router--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router - 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router - 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // generate the vertical path next
         for (int current_row = 0; current_row < 3; current_row++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4), NocRouterId((current_row + 1) * 4)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4), NocRouterId((current_row + 1) * 4));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
@@ -185,12 +193,16 @@ TEST_CASE("test_route_flow", "[vpr_noc_xy_routing]") {
 
         // generate the horizontal path first
         for (int current_router = 12; current_router != 15; current_router++) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_router), NocRouterId(current_router + 1)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_router), NocRouterId(current_router + 1));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // generate the vertical path next
         for (int current_row = 3; current_row > 0; current_row--) {
-            golden_path.emplace_back(NocLink(NocRouterId(current_row * 4 + 3), NocRouterId((current_row - 1) * 4 + 3)));
+            NocLinkId  link_id = noc_model.get_single_noc_link_id(NocRouterId(current_row * 4 + 3), NocRouterId((current_row - 1) * 4 + 3));
+            const auto& link = noc_model.get_single_noc_link(link_id);
+            golden_path.push_back(link);
         }
 
         // store the route found by the algorithm
diff --git a/vtr_flow/parse/parse_config/vpr_noc.txt b/vtr_flow/parse/parse_config/vpr_noc.txt
index f9a9a4440ac..51b7c194712 100644
--- a/vtr_flow/parse/parse_config/vpr_noc.txt
+++ b/vtr_flow/parse/parse_config/vpr_noc.txt
@@ -11,6 +11,10 @@
 %include "timing/vpr.route_min_chan_width.txt"
 %include "timing/vpr.route_relaxed_chan_width.txt"
 
-NoC_agg_bandwidth;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: (.*), noc_latency_cost: .*, noc_latency_constraints_cost: .*,
-NoC_latency;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: .*, noc_latency_cost: (.*), noc_latency_constraints_cost: .*,
-NoC_latency_constraints_cost;vpr.out;NoC Placement Costs. noc_aggregate_bandwidth_cost: .*, noc_latency_cost: .*, noc_latency_constraints_cost: (.*),
\ No newline at end of file
+NoC_agg_bandwidth;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: (.*), latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_latency;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: (.*), n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_n_met_latency_constraints;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: (.*), latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_latency_overrun;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: (.*), congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: .*
+NoC_congested_bw;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: (.*), accum_congested_ratio: .*, n_congested_links: .*
+NoC_congestion_ratio;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: (.*), n_congested_links: .*
+NoC_n_congested_links;vpr.out;NoC Placement Costs. cost: .*, aggregate_bandwidth_cost: .*, latency_cost: .*, n_met_latency_constraints: .*, latency_overrun_cost: .*, congestion_cost: .*, accum_congested_ratio: .*, n_congested_links: (.*)
diff --git a/vtr_flow/scripts/python_libs/vtr/util.py b/vtr_flow/scripts/python_libs/vtr/util.py
index 14d7e519ede..61b52ee991b 100644
--- a/vtr_flow/scripts/python_libs/vtr/util.py
+++ b/vtr_flow/scripts/python_libs/vtr/util.py
@@ -8,6 +8,7 @@
 import subprocess
 import argparse
 import csv
+import os
 
 from collections import OrderedDict
 from pathlib import PurePath
@@ -145,12 +146,23 @@ def run_system_command(
         try:
             # Call the command
             stderr = None if self._valgrind else subprocess.STDOUT
+
+            # capnproto accesses PWD environment variable to learn about
+            # the current working directory. However, subprocess.Popen()
+            # changes the working directory without updating this variable.
+            # This can cause issues when a VTR task passes router lookahead
+            # or RR graph files to VPR. PWD environment variable is updated
+            # manually to prevent capnproto from throwing exceptions.
+            modified_environ = os.environ.copy()
+            modified_environ['PWD'] = str(temp_dir)
+
             proc = subprocess.Popen(
                 cmd,
                 stdout=subprocess.PIPE,  # We grab stdout
                 stderr=stderr,  # stderr redirected to stderr
                 universal_newlines=True,  # Lines always end in \n
                 cwd=str(temp_dir),  # Where to run the command
+                env=modified_environ
             )
 
             # Read the output line-by-line and log it