diff --git a/vpr/src/route/parallel_connection_router.h b/vpr/src/route/parallel_connection_router.h index 1ec257dc1c..8f7db3a5b4 100644 --- a/vpr/src/route/parallel_connection_router.h +++ b/vpr/src/route/parallel_connection_router.h @@ -218,13 +218,29 @@ class ParallelConnectionRouter : public ConnectionRoutersub_threads_.resize(multi_queue_num_threads - 1); for (int i = 0; i < multi_queue_num_threads - 1; ++i) { this->sub_threads_[i] = std::thread(&ParallelConnectionRouter::timing_driven_find_single_shortest_path_from_heap_sub_thread_wrapper, this, i + 1 /*0: main thread*/); - this->sub_threads_[i].detach(); } } ~ParallelConnectionRouter() { this->is_router_destroying_ = true; // signal the helper threads to exit this->thread_barrier_.wait(); // wait until all threads reach the barrier + for (auto& sub_thread : this->sub_threads_) { + VTR_ASSERT(sub_thread.joinable()); + // Wait for all helper threads to terminate + // + // IMPORTANT: This must be done before the main thread destructs this object, + // otherwise, helper threads might have access to polluted data members, leading + // to undefined behavior. In some cases, due to timing issues between threads, + // for example, after both main and helper threads hit the barrier, the main + // thread completes object destruction/cleanup before helper threads can check + // `this->is_router_destroying_ == true` in `..._sub_thread_wrapper` function, + // helper threads may still see `this->is_router_destroying_` as false and fail + // to exit their thread functions. This results in helper threads remaining alive + // and accessing invalid memory addresses, leading to segfaults (please refer to + // https://github.com/verilog-to-routing/vtr-verilog-to-routing/issues/3029 for + // details). + sub_thread.join(); + } VTR_LOG("Parallel Connection Router is being destroyed. Time spent on path search: %.3f seconds.\n", std::chrono::duration(this->path_search_cumulative_time).count()); diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt index 6e72a74148..46e014cf1e 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/config.txt @@ -28,6 +28,6 @@ script_params_common=-starting_stage vpr -sdc_file tasks/regression_tests/vtr_re script_params_list_add = script_params_list_add = --route_chan_width 30 -check_incremental_sta_consistency script_params_list_add = --router_algorithm parallel --num_workers 4 -# script_params_list_add = --enable_parallel_connection_router on --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 -# script_params_list_add = --enable_parallel_connection_router on --multi_queue_num_threads 2 --multi_queue_num_queues 4 --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 -# script_params_list_add = --enable_parallel_connection_router on --multi_queue_num_threads 2 --multi_queue_num_queues 8 --multi_queue_direct_draining on --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 +script_params_list_add = --enable_parallel_connection_router on --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 +script_params_list_add = --enable_parallel_connection_router on --multi_queue_num_threads 2 --multi_queue_num_queues 4 --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 +script_params_list_add = --enable_parallel_connection_router on --multi_queue_num_threads 2 --multi_queue_num_queues 8 --multi_queue_direct_draining on --astar_fac 0.0 --post_target_prune_fac 0.0 --post_target_prune_offset 0.0 diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt index 86cec1613d..eaeb5d795f 100644 --- a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_multiclock/config/golden_results.txt @@ -2,3 +2,6 @@ k6_frac_N10_mem32K_40nm.xml multiclock.blif common 1.59919 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.59919 -1 1.1662 -1 1.8371 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.44782 -1 3.4042 -1 -1.40928 -1 -1 -1 -1 k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--route_chan_width_30_-check_incremental_sta_consistency 1.3344 0.595 0.781297 -1 -1 0.57 0.757256 -1 1.3344 -1 1.16524 -1 1.77873 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.6593 -1 -1 0.268 3.18526 -1 1.18303 -1 3.40324 -1 -1.46764 -1 -1 -1 -1 k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--router_algorithm_parallel_--num_workers_4 1.59919 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.59919 -1 1.14847 -1 1.95678 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.44782 -1 3.38647 -1 -1.28959 -1 -1 -1 -1 + k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--enable_parallel_connection_router_on_--astar_fac_0.0_--post_target_prune_fac_0.0_--post_target_prune_offset_0.0 1.59919 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.59919 -1 1.14847 -1 1.95678 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.44782 -1 3.38647 -1 -1.28959 -1 -1 -1 -1 + k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--enable_parallel_connection_router_on_--multi_queue_num_threads_2_--multi_queue_num_queues_4_--astar_fac_0.0_--post_target_prune_fac_0.0_--post_target_prune_offset_0.0 1.59919 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.59919 -1 1.14847 -1 1.95678 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.44782 -1 3.38647 -1 -1.28959 -1 -1 -1 -1 + k6_frac_N10_mem32K_40nm.xml multiclock.blif common_--enable_parallel_connection_router_on_--multi_queue_num_threads_2_--multi_queue_num_queues_8_--multi_queue_direct_draining_on_--astar_fac_0.0_--post_target_prune_fac_0.0_--post_target_prune_offset_0.0 1.59919 0.595 0.841581 -1 -1 0.57 0.814813 -1 1.59919 -1 1.14847 -1 1.95678 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 0.243 1.71958 -1 -1 0.268 3.24281 -1 1.44782 -1 3.38647 -1 -1.28959 -1 -1 -1 -1