diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dc1b48f28cc..1e5584ab1b6 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -1,6 +1,9 @@ --- name: Bug report about: Create a report to help us improve +title: '' +labels: '' +assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index 96a3a6f322e..7523b9efbf8 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -1,6 +1,9 @@ --- name: Feature request about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' --- diff --git a/.github/ISSUE_TEMPLATE/vtr-change.md b/.github/ISSUE_TEMPLATE/vtr-change.md new file mode 100644 index 00000000000..74d5ec9e8f6 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/vtr-change.md @@ -0,0 +1,25 @@ +--- +name: VTR change +about: Describe purpose and lifecycle of a local change we made to VTR +title: '' +labels: '' +assignees: '' + +--- + +### Why did we need this? (what does this change enable us to do) + + +### What did it change? + + +### Should it be merged upstream - if not, when can we delete it? + +### What is needed to get this merged / deleted? + +* [ ] is the implementation work to make suitable for merging / deletion completed? +* [ ] Is there an associated test? +* [ ] is this currently part of the Conda package? +* [ ] is this properly cleaned up in our local repositories? + +### Tracker / branch / PR & other useful links diff --git a/.travis.yml b/.travis.yml index 2fe883ecece..186a0779a85 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,6 +36,7 @@ addons: - libxml++2.6-dev - perl - python + - python-lxml - texinfo - time - valgrind diff --git a/ODIN_II/verify_odin.sh b/ODIN_II/verify_odin.sh index 0416d9b78f8..089bed1dd82 100755 --- a/ODIN_II/verify_odin.sh +++ b/ODIN_II/verify_odin.sh @@ -64,9 +64,9 @@ function exit_program() { if [ -f ${NEW_RUN_DIR}/test_failures.log ]; then FAIL_COUNT=$(wc -l ${NEW_RUN_DIR}/test_failures.log | cut -d ' ' -f 1) fi - + FAILURE=$(( ${FAIL_COUNT} )) - + if [ "_${FAILURE}" != "_0" ] then echo "Failed ${FAILURE} benchmarks" @@ -107,10 +107,8 @@ _prt_cur_arg() { function help() { printf "Called program with $INPUT - Usage: + Usage: ${THIS_SCRIPT_EXEC} [ OPTIONS / FLAGS ] - - OPTIONS: -h|--help $(_prt_cur_arg off) print this -t|--test < test name > $(_prt_cur_arg ${_TEST}) Test name is one of ( ${TEST_DIR_LIST} heavy_suite light_suite full_suite vtr_basic vtr_strong pre_commit failures debug_sim debug_synth) @@ -121,7 +119,6 @@ printf "Called program with $INPUT -a|--adder_def < /abs/path > $(_prt_cur_arg ${_ADDER_DEF}) Use template to build adders -n|--simulation_count < N > $(_prt_cur_arg ${_SIM_COUNT}) Allow to run the simulation N times to benchmark the simulator -d|--output_dir < /abs/path > $(_prt_cur_arg ${_RUN_DIR_OVERRIDE}) Change the run directory output - FLAGS: -g|--generate_bench $(_prt_cur_arg ${_GENERATE_BENCH}) Generate input and output vector for test -o|--generate_output $(_prt_cur_arg ${_GENERATE_OUTPUT}) Generate output vector for test given its input vector @@ -132,7 +129,6 @@ printf "Called program with $INPUT -b|--batch_sim $(_prt_cur_arg ${_BATCH_SIM}) Use Batch mode multithreaded simulation -p|--perf $(_prt_cur_arg ${_USE_PERF}) Use Perf for monitoring execution -f|--force_simulate $(_prt_cur_arg ${_FORCE_SIM}) Force the simulation to be executed regardless of the config - " } @@ -209,7 +205,7 @@ function cleanup_temp() { fi for runs in ${OUTPUT_DIRECTORY}/run* - do + do rm -Rf ${runs} done @@ -267,14 +263,14 @@ function mv_failed() { # Helper Functions function flag_is_number() { case "_$2" in - _) + _) echo "Passed an empty value for $1" help exit 120 ;; *) case $2 in - ''|*[!0-9]*) + ''|*[!0-9]*) echo "Passed a non number value [$2] for $1" help exit 120 @@ -312,7 +308,7 @@ function _set_flag() { _batch_sim_flag=$(_set_if ${_BATCH_SIM} "--batch") _use_best_coverage_flag=$(_set_if ${_BEST_COVERAGE_OFF} "--best_coverage") _perf_flag=$(_set_if ${_USE_PERF} "--tool perf") - + _vector_flag="-g ${_VECTORS}" _timeout_flag="--time_limit ${_TIMEOUT}s" _simulation_threads_flag=$([ "${_SIM_THREADS}" != "1" ] && echo "-j ${_SIM_THREADS}") @@ -323,20 +319,20 @@ function _set_flag() { function parse_args() { while [[ "$#" > 0 ]] - do - case $1 in + do + case $1 in # Help Desk -h|--help) echo "Printing Help information" help exit_program - + ## directory in benchmark ;;-t|--test) # this is handled down stream if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi @@ -349,11 +345,11 @@ function parse_args() { ;;-a|--adder_def) if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi - + _ADDER_DEF=$2 if [ "${_ADDER_DEF}" != "default" ] && [ "${_ADDER_DEF}" != "optimized" ] && [ ! -f "$(readlink -f ${_ADDER_DEF})" ] @@ -367,11 +363,11 @@ function parse_args() { ;;-d|--output_dir) if [ "_$2" == "_" ] - then + then echo "empty argument for $1" exit 120 fi - + _RUN_DIR_OVERRIDE=$2 if [ ! -d "${_RUN_DIR_OVERRIDE}" ] @@ -409,45 +405,45 @@ function parse_args() { shift # Boolean flags - ;;-g|--generate_bench) + ;;-g|--generate_bench) _GENERATE_BENCH="on" echo "generating output vector for test given predefined input" - ;;-o|--generate_output) + ;;-o|--generate_output) _GENERATE_OUTPUT="on" echo "generating input and output vector for test" - ;;-c|--clean) + ;;-c|--clean) echo "Cleaning temporary run in directory" cleanup_temp - ;;-l|--limit_ressource) + ;;-l|--limit_ressource) _LIMIT_RESSOURCE="on" echo "limiting ressources for benchmark, this can help with small hardware" - ;;-v|--valgrind) + ;;-v|--valgrind) _VALGRIND="on" echo "Using Valgrind for benchmarks" - ;;-B|--best_coverage_off) + ;;-B|--best_coverage_off) _BEST_COVERAGE_OFF="off" echo "turning off using best coverage for benchmark vector generation" - ;;-b|--batch_sim) + ;;-b|--batch_sim) _BATCH_SIM="on" echo "Using Batch multithreaded simulation with -j threads" ;;-p|--perf) _USE_PERF="on" echo "Using perf for synthesis and simulation" - - ;;-f|--force_simulate) + + ;;-f|--force_simulate) _FORCE_SIM="on" - echo "Forcing Simulation" + echo "Forcing Simulation" - ;;*) + ;;*) echo "Unknown parameter passed: $1" - help + help ctrl_c esac shift @@ -477,9 +473,9 @@ function sim() { shift while [[ "$#" > 0 ]] - do + do case $1 in - --custom_args_file) + --custom_args_file) with_custom_args=1 ;; @@ -517,7 +513,7 @@ function sim() { *) echo "Unknown internal parameter passed: $1" - config_help + config_help ctrl_c ;; esac @@ -553,15 +549,15 @@ function sim() { ${_timeout_flag} ${_low_ressource_flag} ${_valgrind_flag}" - + if [ "${_USE_PERF}" == "on" ] then wrapper_odin_command="${wrapper_odin_command} ${_perf_flag} ${DIR}/perf.data" fi odin_command="${DEFAULT_CMD_PARAM} - $(cat ${dir}/odin.args | tr '\n' ' ') - -o ${blif_file} + $(cat ${dir}/odin.args | tr '\n' ' ') + -o ${blif_file} -sim_dir ${DIR}" echo $(echo "${wrapper_odin_command} ${odin_command}" | tr '\n' ' ' | tr -s ' ' ) > ${DIR}/odin_param @@ -608,12 +604,6 @@ function sim() { for arches in ${arch_list} do - arch_cmd="" - if [ -e ${arches} ] - then - arch_cmd="-a ${arches}" - fi - arch_basename=${arches%.xml} arch_name=${arch_basename##*/} @@ -622,10 +612,18 @@ function sim() { DIR="${NEW_RUN_DIR}/${TEST_FULL_REF}" blif_file="${DIR}/odin.blif" - #build commands mkdir -p $DIR + arch_cmd="" + if [ -e ${arches} ] + then + tiles_cmd="../vtr_flow/scripts/add_tiles.py" + arch_file="${arch_name}.xml" + ${tiles_cmd} --arch_xml ${arches} > $DIR/${arch_name}.xml + arch_cmd="-a $DIR/${arch_name}.xml" + fi + ############################### # Synthesis if [ "${_SYNTHESIS}" == "on" ] @@ -712,7 +710,7 @@ function sim() { #run the simulation find ${NEW_RUN_DIR}/${bench_type}/ -name sim_param | xargs -n1 -P$threads -I sim_cmd ${SHELL} -c '$(cat sim_cmd)' - + # move the log for sim_log in $(find ${NEW_RUN_DIR}/${bench_type}/ -name "simulation.log") do @@ -722,7 +720,7 @@ function sim() { disable_failed ${global_simulation_failure} done - + mkdir -p ${NEW_RUN_DIR}/${bench_type}/vectors # move the vectors @@ -733,7 +731,7 @@ function sim() { cp ${sim_input_vectors} ${NEW_RUN_DIR}/${bench_type}/vectors/${BM_NAME} mv ${sim_input_vectors} ${BM_DIR}/${BM_NAME} - + done @@ -803,7 +801,7 @@ function debug_failures() { echo "Which benchmark would you like to debug (type 'quit' or 'q' to exit)?" echo "============" - echo "${FAILURES_LIST}" + echo "${FAILURES_LIST}" echo "============" printf "enter a substring: " @@ -813,7 +811,7 @@ function debug_failures() { echo "exiting" break ;; - *) + *) BM="${FAILED_RUN_DIR}/$(echo "${FAILURES_LIST}" | grep ${INPUT_BM} | tail -n 1)" if [ "_${BM}" != "_" ] && [ -f "${BM}/${CMD_FILE_NAME}" ] @@ -854,7 +852,7 @@ LIGHT_LIST=( "operators" "arch" "other" - "micro" + "micro" "syntax" "FIR" ) @@ -934,8 +932,8 @@ case ${_TEST} in full_suite) run_all - ;; - + ;; + heavy_suite) run_heavy_suite ;; @@ -971,3 +969,16 @@ print_time_since $START exit_program ### end here +© 2019 GitHub, Inc. +Terms +Privacy +Security +Status +Help +Contact GitHub +Pricing +API +Training +Blog +About + diff --git a/README.md b/README.md index d57d1a35bc0..4a41e9f4868 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,14 @@ + +SymbiFlow WIP changes for Verilog to Routing (VTR) +================================================== + +This branch contains work in progress changes for using Verilog to Routing +(VTR) as part of SymbiFlow. + +--- + # Verilog to Routing (VTR) -[![Build Status](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.org/verilog-to-routing/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest) +[![Build Status](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing.svg?branch=master)](https://travis-ci.com/SymbiFlow/vtr-verilog-to-routing) [![Documentation Status](https://readthedocs.org/projects/vtr/badge/?version=latest)](http://docs.verilogtorouting.org/en/latest/?badge=latest) ## Introduction The Verilog-to-Routing (VTR) project is a world-wide collaborative effort to provide a open-source framework for conducting FPGA architecture and CAD research and development. diff --git a/libs/libarchfpga/src/physical_types.cpp b/libs/libarchfpga/src/physical_types.cpp index 0e684e37b29..a345c9dd3ae 100644 --- a/libs/libarchfpga/src/physical_types.cpp +++ b/libs/libarchfpga/src/physical_types.cpp @@ -140,6 +140,15 @@ std::vector t_type_descriptor::get_clock_pins_indices() const { return indices; } +bool t_type_descriptor::is_available_tile_index(int index_to_check) const { + auto search = this->available_tiles_indices.find(index_to_check); + if (search != available_tiles_indices.end()) { + return true; + } + + return false; +} + /** * t_pb_graph_node */ diff --git a/libs/libarchfpga/src/physical_types.h b/libs/libarchfpga/src/physical_types.h index 36c1150cc2a..0213261fecf 100644 --- a/libs/libarchfpga/src/physical_types.h +++ b/libs/libarchfpga/src/physical_types.h @@ -31,9 +31,9 @@ #include #include #include -#include #include #include +#include #include "vtr_ndmatrix.h" #include "vtr_hash.h" @@ -556,6 +556,20 @@ constexpr int DEFAULT_SWITCH = -2; * pb_type: Internal subblocks and routing information for this physical block * pb_graph_head: Head of DAG of pb_types_nodes and their edges * + * + * num_equivalent_tiles: Specifies the number of equivalent physical types that can be used during placement. + * If the value is `0` all the data structures relative to the equivalent tiles will be empty. + * equivalent_tiles: Array containing pointers to the equivalent tiles. The number of elements contained is specified + * by num_equivalent_tiles. + * equivalent_tile_pin_mapping: Multi-dimensional array that, for each different equivalent tile contains a mapping between + * the pins of the two tiles. + * Example: equivalent_tile_pin_mapping[eq_tile_index][pin_index] = equivalent_pin_index + * This is necessary to maintain consistency between two equivalent tiles that have the same pins + * defined with different indeces. + * equivalent_tile_inverse_pin_mapping: Multi-dimensional array that works as the previous one, but the mapping is inverse in this case. + * Example: equivalent_tile_pin_mapping[eq_tile_index][equivalent_pin_index] = pin_index + * available_tiles_indices: unordered map used to have a fast lookup on the available tiles. + * * area: Describes how much area this logic block takes, if undefined, use default * type_timing_inf: timing information unique to this type * num_drivers: Total number of output drivers supplied @@ -595,6 +609,13 @@ struct t_type_descriptor /* TODO rename this. maybe physical type descriptor or t_pb_type* pb_type = nullptr; t_pb_graph_node* pb_graph_head = nullptr; + /* Equivalent tiles information */ + int num_equivalent_tiles = 0; + std::unordered_map equivalent_tiles; /* [0..num_equivalent_tiles-1] */ + std::unordered_map> equivalent_tile_pin_mapping; /* [0..num_equivalent_tiles-1][0..num_pins-1] */ + std::unordered_map> equivalent_tile_inverse_pin_mapping; /* [0..num_equivalent_tiles-1][0..num_pins-1] */ + std::unordered_set available_tiles_indices; + float area = 0; /* This info can be determined from class_inf and pin_class but stored for faster access */ @@ -603,8 +624,15 @@ struct t_type_descriptor /* TODO rename this. maybe physical type descriptor or int index = -1; /* index of type descriptor in array (allows for index referencing) */ + /*********** + * Methods * + ***********/ + /* Returns the indices of pins that contain a clock for this physical logic block */ std::vector get_clock_pins_indices() const; + + /* Returns a boolean set to True if the input index belongs to an available tile, False otherwise */ + bool is_available_tile_index(int index_to_check) const; }; typedef const t_type_descriptor* t_type_ptr; @@ -1200,6 +1228,7 @@ struct t_segment_inf { std::vector cb; std::vector sb; //float Cmetal_per_m; /* Wire capacitance (per meter) */ + t_metadata_dict* meta = nullptr; }; enum class SwitchType { @@ -1225,6 +1254,7 @@ enum class BufferSize { * R: Equivalent resistance of the buffer/switch. * * Cin: Input capacitance. * * Cout: Output capacitance. * + * Cinternal: Internal capacitance in a buffer with fanout. * * Tdel_map: A map where the key is the number of inputs and the entry * * is the corresponding delay. If there is only one entry at key * * UNDEFINED, then delay is a constant (doesn't vary with fan-in). * @@ -1242,6 +1272,7 @@ struct t_arch_switch_inf { float R = 0.; float Cin = 0.; float Cout = 0.; + float Cinternal = 0.; // defined the property Cinternal float mux_trans_size = 1.; BufferSize buf_size_type = BufferSize::AUTO; float buf_size = 0.; @@ -1293,6 +1324,7 @@ struct t_arch_switch_inf { * R: Equivalent resistance of the buffer/switch. * * Cin: Input capacitance. * * Cout: Output capacitance. * + * Cinternal: Internal capacitance in a buffer. * * Tdel: Intrinsic delay. The delay through an unloaded switch is * * Tdel + R * Cout. * * mux_trans_size: The area of each transistor in the segment's driving mux * @@ -1303,6 +1335,7 @@ struct t_rr_switch_inf { float R = 0.; float Cin = 0.; float Cout = 0.; + float Cinternal = 0.; //defined the property Cinternal float Tdel = 0.; float mux_trans_size = 0.; float buf_size = 0.; diff --git a/libs/libarchfpga/src/read_xml_arch_file.cpp b/libs/libarchfpga/src/read_xml_arch_file.cpp index 519b0287b1d..1a62ad7dcdd 100644 --- a/libs/libarchfpga/src/read_xml_arch_file.cpp +++ b/libs/libarchfpga/src/read_xml_arch_file.cpp @@ -101,7 +101,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector static t_fc_override Process_Fc_override(pugi::xml_node node, const pugiutil::loc_data& loc_data); static void ProcessSwitchblockLocations(pugi::xml_node swtichblock_locations, t_type_descriptor* type, const t_arch& arch, const pugiutil::loc_data& loc_data); static e_fc_value_type string_to_fc_value_type(const std::string& str, pugi::xml_node node, const pugiutil::loc_data& loc_data); -static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data); +static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data); static void ProcessChanWidthDistr(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data); @@ -111,12 +111,29 @@ static void ProcessModelPorts(pugi::xml_node port_group, t_model* model, std::se static void ProcessLayout(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data); static t_grid_def ProcessGridLayout(pugi::xml_node layout_type_tag, const pugiutil::loc_data& loc_data); static void ProcessDevice(pugi::xml_node Node, t_arch* arch, t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data); +static void ProcessTiles(pugi::xml_node Node, + t_type_descriptor** Types, + int* NumTypes, + std::unordered_map* TypeMap, + const pugiutil::loc_data& loc_data); +static void ProcessTilesTags(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const t_default_fc_spec& arch_def_fc, + const pugiutil::loc_data& loc_data); +static void ProcessTileExtraModes(pugi::xml_node Node, + t_type_descriptor* Type, + std::unordered_map TypeMap, + const pugiutil::loc_data& loc_data); +static void ProcessTileExtraModePinMapping(pugi::xml_node Node, + t_type_descriptor* Type, + t_type_descriptor* EquivalentType, + int imode, + const pugiutil::loc_data& loc_data); static void ProcessComplexBlocks(pugi::xml_node Node, - t_type_descriptor** Types, - int* NumTypes, + std::unordered_map TypeMap, t_arch& arch, const bool timing_enabled, - const t_default_fc_spec& arch_def_fc, const pugiutil::loc_data& loc_data); static void ProcessSwitches(pugi::xml_node Node, t_arch_switch_inf** Switches, @@ -176,6 +193,8 @@ int find_switch_by_name(const t_arch& arch, std::string switch_name); e_side string_to_side(std::string side_str); +static t_type_descriptor* get_corresponding_tile(std::unordered_map TypeMap, const char* type_name); +static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int offset); /* * * @@ -254,9 +273,18 @@ void XmlReadArch(const char* ArchFile, const bool timing_enabled, t_arch* arch, ProcessSwitchblocks(Next, arch, loc_data); } - /* Process types */ + /* Process tiles */ + std::unordered_map TypeMap; + Next = get_single_child(architecture, "tiles", loc_data); + ProcessTiles(Next, Types, NumTypes, &TypeMap, loc_data); + + /* Process pb_types */ Next = get_single_child(architecture, "complexblocklist", loc_data); - ProcessComplexBlocks(Next, Types, NumTypes, *arch, timing_enabled, arch_def_fc, loc_data); + ProcessComplexBlocks(Next, TypeMap, *arch, timing_enabled, loc_data); + + /* Process tile tags that after pb_type have been parsed */ + Next = get_single_child(architecture, "tiles", loc_data); + ProcessTilesTags(Next, TypeMap, *arch, arch_def_fc, loc_data); /* Process directs */ Next = get_single_child(architecture, "directlist", loc_data, OPTIONAL); @@ -969,14 +997,6 @@ static void ProcessPb_Type(pugi::xml_node Parent, t_pb_type* pb_type, t_mode* mo children_to_expect.push_back("model"); children_to_expect.push_back("pb_type"); children_to_expect.push_back("interconnect"); - - if (is_root_pb_type) { - VTR_ASSERT(!is_leaf_pb_type); - //Top level pb_type's may also have the following tag types - children_to_expect.push_back("fc"); - children_to_expect.push_back("pinlocations"); - children_to_expect.push_back("switchblock_locations"); - } } else { VTR_ASSERT(is_leaf_pb_type); VTR_ASSERT(!is_root_pb_type); @@ -1698,7 +1718,7 @@ static void Process_Fc(pugi::xml_node Node, t_type_descriptor* Type, std::vector /* Use the default value, if available */ if (!arch_def_fc.specified) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), - " is missing child , and no specified in architecture\n"); + " is missing child , and no specified in architecture\n"); } def_fc_spec = arch_def_fc; } @@ -2017,28 +2037,6 @@ static void ProcessSwitchblockLocations(pugi::xml_node switchblock_locations, t_ } } -/* Thie processes attributes of the 'type' tag */ -static void ProcessComplexBlockProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) { - const char* Prop; - - expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data); - - /* Load type name */ - Prop = get_attribute(Node, "name", loc_data).value(); - Type->name = vtr::strdup(Prop); - - /* Load properties */ - Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */ - Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1); - Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1); - Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED); - - if (atof(Prop) < 0) { - archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), - "Area for type %s must be non-negative\n", Type->name); - } -} - /* Takes in node pointing to and loads all the * child type objects. */ static void ProcessModels(pugi::xml_node Node, t_arch* arch, const pugiutil::loc_data& loc_data) { @@ -2620,16 +2618,36 @@ static void ProcessChanWidthDistrDir(pugi::xml_node Node, t_chan* chan, const pu chan->dc = get_attribute(Node, "dc", loc_data, hasDc).as_float(0); } -/* Takes in node pointing to and loads all the - * child type objects. */ -static void ProcessComplexBlocks(pugi::xml_node Node, - t_type_descriptor** Types, - int* NumTypes, - t_arch& arch, - const bool timing_enabled, - const t_default_fc_spec& arch_def_fc, - const pugiutil::loc_data& loc_data) { - pugi::xml_node CurType, Prev; +/* Thie processes attributes of the 'type' tag */ +static void ProcessTileProps(pugi::xml_node Node, t_type_descriptor* Type, const pugiutil::loc_data& loc_data) { + const char* Prop; + + expect_only_attributes(Node, {"name", "capacity", "width", "height", "area"}, loc_data); + + /* Load type name */ + Prop = get_attribute(Node, "name", loc_data).value(); + Type->name = vtr::strdup(Prop); + + /* Load properties */ + Type->capacity = get_attribute(Node, "capacity", loc_data, OPTIONAL).as_uint(1); /* TODO: Any block with capacity > 1 that is not I/O has not been tested, must test */ + Type->width = get_attribute(Node, "width", loc_data, OPTIONAL).as_uint(1); + Type->height = get_attribute(Node, "height", loc_data, OPTIONAL).as_uint(1); + Type->area = get_attribute(Node, "area", loc_data, OPTIONAL).as_float(UNDEFINED); + + if (atof(Prop) < 0) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), + "Area for type %s must be non-negative\n", Type->name); + } +} + +/* Takes in node pointing to and loads all the * + * child type objects. */ +static void ProcessTiles(pugi::xml_node Node, + t_type_descriptor** Types, + int* NumTypes, + std::unordered_map* TypeMap, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType; pugi::xml_node Cur; t_type_descriptor* Type; int i; @@ -2638,7 +2656,7 @@ static void ProcessComplexBlocks(pugi::xml_node Node, /* Alloc the type list. Need one additional t_type_desctiptors: * 1: empty psuedo-type */ - *NumTypes = count_children(Node, "pb_type", loc_data) + 1; + *NumTypes = count_children(Node, "tile", loc_data) + 1; *Types = new t_type_descriptor[*NumTypes]; cb_type_descriptors = *Types; @@ -2654,30 +2672,63 @@ static void ProcessComplexBlocks(pugi::xml_node Node, CurType = Node.first_child(); while (CurType) { - check_node(CurType, "pb_type", loc_data); + check_node(CurType, "tile", loc_data); /* Alias to current type */ Type = &(*Types)[i]; /* Parses the properties fields of the type */ - ProcessComplexBlockProps(CurType, Type, loc_data); + ProcessTileProps(CurType, Type, loc_data); ret_pb_type_descriptors = pb_type_descriptors.insert(pair(Type->name, 0)); if (!ret_pb_type_descriptors.second) { archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), - "Duplicate pb_type descriptor name: '%s'.\n", Type->name); + "Duplicate tile descriptor name: '%s'.\n", Type->name); } - /* Load pb_type info */ - Type->pb_type = new t_pb_type; - Type->pb_type->name = vtr::strdup(Type->name); - ProcessPb_Type(CurType, Type->pb_type, nullptr, timing_enabled, arch, loc_data); - Type->num_pins = Type->capacity - * (Type->pb_type->num_input_pins - + Type->pb_type->num_output_pins - + Type->pb_type->num_clock_pins); - Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins; - Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins; + Type->index = i; + Type->available_tiles_indices.insert(i); + + auto result = TypeMap->insert(std::make_pair(Type->name, Type)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate tile found: '%s'.\n", Type->name); + } + + /* Type fully read */ + ++i; + + /* Free this node and get its next sibling node */ + CurType = CurType.next_sibling(CurType.name()); + } + pb_type_descriptors.clear(); +} + +// This step has to be performed after the root pb_type has been parsed +static void ProcessTilesTags(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const t_default_fc_spec& arch_def_fc, + const pugiutil::loc_data& loc_data) { + pugi::xml_node Cur, CurType; + t_type_descriptor* Type; + + /* Process the types */ + CurType = Node.first_child(); + while (CurType) { + check_node(CurType, "tile", loc_data); + + /* Load type name */ + const char* NameProp = get_attribute(CurType, "name", loc_data).value(); + + /* Alias to current type */ + Type = get_corresponding_tile(TypeMap, vtr::strdup(NameProp)); + if (Type == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "No tiles found corresponding to current root level pb type: '%s'.\n", Type->pb_type->name); + } + + VTR_ASSERT(Type->pb_type != nullptr); /* Load pin names and classes and locations */ Cur = get_single_child(CurType, "pinlocations", loc_data, OPTIONAL); @@ -2698,19 +2749,178 @@ static void ProcessComplexBlocks(pugi::xml_node Node, Cur = get_single_child(CurType, "fc", loc_data, OPTIONAL); Process_Fc(Cur, Type, arch.Segments, arch_def_fc, loc_data); - //Load switchblock type and location overrides + /* Load switchblock type and location overrides */ Cur = get_single_child(CurType, "switchblock_locations", loc_data, OPTIONAL); ProcessSwitchblockLocations(Cur, Type, arch, loc_data); - Type->index = i; - - /* Type fully read */ - ++i; + /* Load possible modes (pb_types which are compatible with the current tile) */ + Cur = get_single_child(CurType, "equivalent_tiles", loc_data, OPTIONAL); + if (Cur) { + ProcessTileExtraModes(Cur, Type, TypeMap, loc_data); + } /* Free this node and get its next sibling node */ CurType = CurType.next_sibling(CurType.name()); } - pb_type_descriptors.clear(); +} + +/* Processes the equivalent tiles defined in the XML arch definition + * + * + * + * + * + * + * + * + * + * + * In particular this function parses the `modes` (if they exist) of each tile + * and adds the equivalent tile information to the t_type_descriptor relative to + * the current tile. + * It populates the following t_type_descriptor members: + * - num_equivalent_tiles; + * - equivalent_tiles. + */ +static void ProcessTileExtraModes(pugi::xml_node Node, + t_type_descriptor* Type, + std::unordered_map TypeMap, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType; + + Type->num_equivalent_tiles = count_children(Node, "mode", loc_data); + int index = 0; + CurType = Node.first_child(); + while (CurType && index < Type->num_equivalent_tiles) { + const char* equivalent_tile_name = get_attribute(CurType, "name", loc_data).value(); + auto EquivalentTile = get_corresponding_tile(TypeMap, equivalent_tile_name); + + if (EquivalentTile == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "No tiles found corresponding to equivalent tile name: '%s'.\n", Type->pb_type->name); + } + + // Inserts equivalent tile as last element so the index points to the correct equivalent tile. + auto result = Type->equivalent_tiles.insert(std::make_pair(index, EquivalentTile)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile found: '%s'.\n", EquivalentTile->name); + } + + Type->available_tiles_indices.insert(EquivalentTile->index); + + ProcessTileExtraModePinMapping(CurType, Type, Type->equivalent_tiles[index], index, loc_data); + + index++; + CurType = CurType.next_sibling(CurType.name()); + } +} + +/* Processes the pin_mapping of each equivalent tile. + * It goes through each mode and populates the following t_type_descriptor memebrs: + * - equivalent_tile_pin_mapping; + * - equivalent_tile_inverse_pin_mapping. + */ +static void ProcessTileExtraModePinMapping(pugi::xml_node Node, + t_type_descriptor* Type, + t_type_descriptor* EquivalentType, + int imode, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurType = Node.first_child(); + const char *from_port, *to_port; + int from_pin_index, to_pin_index; + int num_pins; + + std::unordered_map pin_mapping, inverse_pin_mapping; + + while (CurType) { + //Process each mode mapping + if (CurType.name() != std::string("map")) { + bad_tag(CurType, loc_data, Node, {"map"}); + } + + from_port = get_attribute(CurType, "from", loc_data).value(); + to_port = get_attribute(CurType, "to", loc_data).value(); + num_pins = get_attribute(CurType, "num_pins", loc_data, OPTIONAL).as_int(1); + + for (int offset = 0; offset < num_pins; offset++) { + from_pin_index = get_pin_index_by_name(Type, from_port, offset); + to_pin_index = get_pin_index_by_name(EquivalentType, to_port, offset); + + auto result = pin_mapping.insert(std::make_pair(from_pin_index, to_pin_index)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile 'from_pin': '%d' (in %s).\n", from_pin_index, Type->name); + } + + result = inverse_pin_mapping.insert(std::make_pair(to_pin_index, from_pin_index)); + if (!result.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurType), + "Duplicate equivalent tile 'to_pin': '%d' (in %s).\n", to_pin_index, Type->name); + } + } + + CurType = CurType.next_sibling(CurType.name()); + } + + Type->equivalent_tile_pin_mapping.insert(std::make_pair(imode, pin_mapping)); + Type->equivalent_tile_inverse_pin_mapping.insert(std::make_pair(imode, inverse_pin_mapping)); +} + +static void ProcessComplexBlocks(pugi::xml_node Node, + std::unordered_map TypeMap, + t_arch& arch, + const bool timing_enabled, + const pugiutil::loc_data& loc_data) { + pugi::xml_node CurPbType; + t_type_descriptor* Type; + + map pb_types; + pair::iterator, bool> ret_pb_types; + + CurPbType = Node.first_child(); + while (CurPbType) { + check_node(CurPbType, "pb_type", loc_data); + + char* type_name = nullptr; + + for (pugi::xml_attribute attr : CurPbType.attributes()) { + if (attr.name() != std::string("name")) { + bad_attribute(attr, CurPbType, loc_data); + } else { + type_name = vtr::strdup(attr.value()); + } + } + + Type = get_corresponding_tile(TypeMap, type_name); + if (Type == nullptr) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType), + "No tiles found corresponding to current root level pb type: '%s'.\n", type_name); + } + + Type->pb_type = new t_pb_type; + Type->pb_type->name = vtr::strdup(type_name); + + ret_pb_types = pb_types.insert( + pair(Type->pb_type->name, 0)); + if (!ret_pb_types.second) { + archfpga_throw(loc_data.filename_c_str(), loc_data.line(CurPbType), + "Duplicate pb_type descriptor name: '%s'.\n", Type->pb_type->name); + } + + ProcessPb_Type(CurPbType, Type->pb_type, nullptr, timing_enabled, arch, loc_data); + Type->num_pins = Type->capacity + * (Type->pb_type->num_input_pins + + Type->pb_type->num_output_pins + + Type->pb_type->num_clock_pins); + Type->num_receivers = Type->capacity * Type->pb_type->num_input_pins; + Type->num_drivers = Type->capacity * Type->pb_type->num_output_pins; + + /* Load pin names and classes and locations */ + + CurPbType = CurPbType.next_sibling(CurPbType.name()); + } + pb_types.clear(); } static void ProcessSegments(pugi::xml_node Parent, @@ -3074,23 +3284,23 @@ static void ProcessSwitches(pugi::xml_node Parent, SwitchType type = SwitchType::MUX; if (0 == strcmp(type_name, "mux")) { type = SwitchType::MUX; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size", "mux_trans_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element } else if (0 == strcmp(type_name, "tristate")) { type = SwitchType::TRISTATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Cinternal", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffered switch should have a Cinternal element } else if (0 == strcmp(type_name, "buffer")) { type = SwitchType::BUFFER; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel", "buf_size", "power_buf_size"}, " with type '"s + type_name + "'"s, loc_data); // buffer should not have a Cinternal element } else if (0 == strcmp(type_name, "pass_gate")) { type = SwitchType::PASS_GATE; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type '"s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element } else if (0 == strcmp(type_name, "short")) { type = SwitchType::SHORT; - expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); + expect_only_attributes(Node, {"type", "name", "R", "Cin", "Cout", "Tdel"}, " with type "s + type_name + "'"s, loc_data); // unbuffered switch does not have Cinternal element } else { archfpga_throw(loc_data.filename_c_str(), loc_data.line(Node), @@ -3102,6 +3312,8 @@ static void ProcessSwitches(pugi::xml_node Parent, ReqOpt COUT_REQD = TIMING_ENABLE_REQD; ReqOpt CIN_REQD = TIMING_ENABLE_REQD; + ReqOpt CINTERNAL_REQD = OPTIONAL; //defined the parameter + if (arch_switch.type() == SwitchType::SHORT) { //Cin/Cout are optional on shorts, since they really only have one capacitance CIN_REQD = OPTIONAL; @@ -3109,6 +3321,7 @@ static void ProcessSwitches(pugi::xml_node Parent, } arch_switch.Cin = get_attribute(Node, "Cin", loc_data, CIN_REQD).as_float(0); arch_switch.Cout = get_attribute(Node, "Cout", loc_data, COUT_REQD).as_float(0); + arch_switch.Cinternal = get_attribute(Node, "Cinternal", loc_data, CINTERNAL_REQD).as_float(0); // retrieve the optional parameter if (arch_switch.type() == SwitchType::MUX) { //Only muxes have mux transistors @@ -4114,3 +4327,41 @@ e_side string_to_side(std::string side_str) { } return side; } + +static t_type_descriptor* get_corresponding_tile(std::unordered_map TypeMap, + const char* type_name) { + auto result = TypeMap.find(type_name); + + if (result == TypeMap.end()) { + return nullptr; + } + + return result->second; +} + +static int get_pin_index_by_name(t_type_descriptor* Type, const char* port_name, int pin_index_in_port) { + int ipin = OPEN; + + t_pb_type* pb_type = Type->pb_type; + t_port* matched_port = nullptr; + int port_base_ipin = 0; + + for (int iport = 0; iport < pb_type->num_ports; ++iport) { + t_port* port = &pb_type->ports[iport]; + + if (0 == strcmp(port->name, port_name)) { + matched_port = port; + break; + } + port_base_ipin += port->num_pins; + } + + if (matched_port) { + VTR_ASSERT(0 == strcmp(matched_port->name, port_name)); + VTR_ASSERT(pin_index_in_port < matched_port->num_pins); + + ipin = port_base_ipin + pin_index_in_port; + } + + return ipin; +} diff --git a/libs/libvtrutil/src/vtr_log.cpp b/libs/libvtrutil/src/vtr_log.cpp index c0ae90759de..55d850c514a 100644 --- a/libs/libvtrutil/src/vtr_log.cpp +++ b/libs/libvtrutil/src/vtr_log.cpp @@ -1,5 +1,9 @@ -#include "vtr_log.h" +#include +#include +#include +#include "vtr_util.h" +#include "vtr_log.h" #include "log.h" namespace vtr { @@ -14,3 +18,33 @@ void set_log_file(const char* filename) { } } // namespace vtr + +void add_warnings_to_suppress(std::string function_name) { + warnings_to_suppress.insert(function_name); +} + +void set_noisy_warn_log_file(const char* log_file_name) { + std::ofstream log; + log.open(log_file_name, std::ifstream::out | std::ifstream::trunc); + log.close(); + noisy_warn_log_file = std::string(log_file_name); +} + +void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...) { + std::string function_name(pszFuncName); + + va_list va_args; + va_start(va_args, pszMessage); + std::string msg = vtr::vstring_fmt(pszMessage, va_args); + va_end(va_args); + + auto result = warnings_to_suppress.find(function_name); + if (result == warnings_to_suppress.end()) { + vtr::printf_warning(pszFileName, lineNum, msg.data()); + } else { + std::ofstream log; + log.open(noisy_warn_log_file.data(), std::ios_base::app); + log << "Warning:\n\tfile: " << pszFileName << "\n\tline: " << lineNum << "\n\tmessage: " << msg << std::endl; + log.close(); + } +} diff --git a/libs/libvtrutil/src/vtr_log.h b/libs/libvtrutil/src/vtr_log.h index 878653ba84d..b7bc2dceadc 100644 --- a/libs/libvtrutil/src/vtr_log.h +++ b/libs/libvtrutil/src/vtr_log.h @@ -1,6 +1,8 @@ #ifndef VTR_LOG_H #define VTR_LOG_H #include +#include +#include /* * This header defines useful logging macros for VTR projects. @@ -71,15 +73,18 @@ #define VTR_LOGF_ERROR(file, line, ...) VTR_LOGVF_ERROR(true, file, line, __VA_ARGS__) #define VTR_LOGF_NOP(file, line, ...) VTR_LOGVF_NOP(true, file, line, __VA_ARGS__) +//Custom file-line-func location logging macros +#define VTR_LOGFF_WARN(file, line, func, ...) VTR_LOGVFF_WARN(true, file, line, func, __VA_ARGS__) + //Conditional logging and custom file-line location macros #define VTR_LOGVF(expr, file, line, ...) \ do { \ if (expr) vtr::printf(__VA_ARGS__); \ } while (false) -#define VTR_LOGVF_WARN(expr, file, line, ...) \ - do { \ - if (expr) vtr::printf_warning(file, line, __VA_ARGS__); \ +#define VTR_LOGVF_WARN(expr, file, line, ...) \ + do { \ + if (expr) suppress_warning(file, line, __func__, __VA_ARGS__); \ } while (false) #define VTR_LOGVF_ERROR(expr, file, line, ...) \ @@ -87,6 +92,12 @@ if (expr) vtr::printf_error(file, line, __VA_ARGS__); \ } while (false) +// Conditional logging and custom file-line-func location macros +#define VTR_LOGVFF_WARN(expr, file, line, func, ...) \ + do { \ + if (expr) suppress_warning(file, line, func, __VA_ARGS__); \ + } while (false) + //No-op version of logging macro which avoids unused parameter warnings. // //Note that to avoid unused parameter warnings we call sizeof() and cast @@ -129,4 +140,14 @@ void set_log_file(const char* filename); } // namespace vtr +// The following data structure and functions allow to suppress noisy warnings +// and direct them into an external file. +static std::unordered_set warnings_to_suppress; +static std::string noisy_warn_log_file; + +void add_warnings_to_suppress(std::string function_name); +void set_noisy_warn_log_file(const char* log_file_name); + +void suppress_warning(const char* pszFileName, unsigned int lineNum, const char* pszFuncName, const char* pszMessage, ...); + #endif diff --git a/utils/fasm/src/fasm.cpp b/utils/fasm/src/fasm.cpp index ee7c7cd8fd1..9797f97e81c 100644 --- a/utils/fasm/src/fasm.cpp +++ b/utils/fasm/src/fasm.cpp @@ -93,7 +93,11 @@ void FasmWriterVisitor::check_interconnect(const t_pb_routes &pb_routes, int ino return; } - t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(blk_type_->index)[prev_node]; + auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + + t_type_ptr original_blk_type = clb_nlist.block_type(current_blk_id_, false); + + t_pb_graph_pin *prev_pin = pb_graph_pin_lookup_from_index_by_type_.at(original_blk_type->index)[prev_node]; int prev_edge; for(prev_edge = 0; prev_edge < prev_pin->num_output_edges; prev_edge++) { diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 06ac0c2c237..879561b4af6 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -347,8 +347,8 @@ static void SetupRouterOpts(const t_options& Options, t_router_opts* RouterOpts) RouterOpts->max_convergence_count = Options.router_max_convergence_count; RouterOpts->reconvergence_cpd_threshold = Options.router_reconvergence_cpd_threshold; RouterOpts->first_iteration_timing_report_file = Options.router_first_iteration_timing_report_file; - RouterOpts->strict_checks = Options.strict_checks; + RouterOpts->disable_check_route = Options.disable_check_route; } static void SetupAnnealSched(const t_options& Options, diff --git a/vpr/src/base/clustered_netlist.cpp b/vpr/src/base/clustered_netlist.cpp index eb69053c404..de5f5f097e6 100644 --- a/vpr/src/base/clustered_netlist.cpp +++ b/vpr/src/base/clustered_netlist.cpp @@ -25,11 +25,30 @@ t_pb* ClusteredNetlist::block_pb(const ClusterBlockId id) const { } t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id) const { + return block_type(id, true); +} + +t_type_ptr ClusteredNetlist::block_type(const ClusterBlockId id, bool get_equivalent_if_set) const { VTR_ASSERT_SAFE(valid_block_id(id)); + if (block_eq_type_index(id) != OPEN && get_equivalent_if_set) { + return block_eq_type_[id]; + } return block_types_[id]; } +int ClusteredNetlist::block_eq_type_index(const ClusterBlockId id) const { + VTR_ASSERT_SAFE(valid_block_id(id)); + + return block_eq_type_index_[id]; +} + +bool ClusteredNetlist::block_eq_type_effective(const ClusterBlockId id) const { + VTR_ASSERT_SAFE(valid_block_id(id)); + + return block_eq_type_effective_[id]; +} + ClusterNetId ClusteredNetlist::block_net(const ClusterBlockId blk_id, const int phys_pin_index) const { auto pin_id = block_pin(blk_id, phys_pin_index); @@ -120,9 +139,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type block_pbs_.insert(blk_id, pb); block_types_.insert(blk_id, type); - + block_eq_type_.insert(blk_id, type); + block_eq_type_index_.insert(blk_id, OPEN); + block_eq_type_effective_.insert(blk_id, false); //Allocate and initialize every potential pin of the block - block_logical_pins_.insert(blk_id, std::vector(type->num_pins, ClusterPinId::INVALID())); + int num_pins = get_max_num_pins(type); + block_logical_pins_.insert(blk_id, std::vector(num_pins, ClusterPinId::INVALID())); } //Check post-conditions: size @@ -135,6 +157,12 @@ ClusterBlockId ClusteredNetlist::create_block(const char* name, t_pb* pb, t_type return blk_id; } +void ClusteredNetlist::set_equivalent_block_type(const ClusterBlockId blk_id, int i_eq_type, t_type_ptr eq_type) { + block_eq_type_index_[blk_id] = i_eq_type; + block_eq_type_effective_[blk_id] = true; + block_eq_type_[blk_id] = eq_type; +} + void ClusteredNetlist::set_pin_physical_index(const ClusterPinId pin, const int phys_pin_index) { VTR_ASSERT_SAFE(valid_pin_id(pin)); auto blk = pin_block(pin); @@ -322,3 +350,20 @@ bool ClusteredNetlist::validate_net_sizes_impl(size_t num_nets) const { } return true; } + +/* + * Utilities + */ +int ClusteredNetlist::get_max_num_pins(t_type_ptr type) { + int max_pins = type->num_pins; + + for (int itype = 0; itype < type->num_equivalent_tiles; itype++) { + auto result = type->equivalent_tiles.find(itype); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + int num_pins = result->second->num_pins; + max_pins = std::max(num_pins, max_pins); + } + + return max_pins; +} diff --git a/vpr/src/base/clustered_netlist.h b/vpr/src/base/clustered_netlist.h index 343cffaa9b9..8b3f34fb1ff 100644 --- a/vpr/src/base/clustered_netlist.h +++ b/vpr/src/base/clustered_netlist.h @@ -125,8 +125,25 @@ class ClusteredNetlist : public Netlistequivalent_tiles[index] + int block_eq_type_index(const ClusterBlockId id) const; + + //Returns true if the block has been placed in an equivalent tile + bool block_eq_type_effective(const ClusterBlockId id) const; //Returns the net of the block attached to the specific pin index ClusterNetId block_net(const ClusterBlockId blk_id, const int pin_index) const; @@ -174,6 +191,13 @@ class ClusteredNetlist : public Netlist block_pbs_; //Physical block representing the clustering & internal hierarchy of each CLB vtr::vector_map block_types_; //The type of physical block this user circuit block is mapped to + vtr::vector_map block_eq_type_; //The equivalent type (if any) selected for a CLB + vtr::vector_map block_eq_type_index_; //Index relative to the equivalent tile chosen during placement + vtr::vector_map block_eq_type_effective_; //Boolean to state if equivalent tile is used vtr::vector_map> block_logical_pins_; //The logical pin associated with each physical block pin //Pins diff --git a/vpr/src/base/echo_files.cpp b/vpr/src/base/echo_files.cpp index d195c7d3871..e35b04c6da0 100644 --- a/vpr/src/base/echo_files.cpp +++ b/vpr/src/base/echo_files.cpp @@ -112,6 +112,8 @@ void alloc_and_load_echo_file_info() { setEchoFileName(E_ECHO_CHAN_DETAILS, "chan_details.txt"); setEchoFileName(E_ECHO_SBLOCK_PATTERN, "sblock_pattern.txt"); setEchoFileName(E_ECHO_ENDPOINT_TIMING, "endpoint_timing.echo.json"); + + setEchoFileName(E_ECHO_LOOKAHEAD_MAP, "lookahead_map.echo"); } void free_echo_file_info() { diff --git a/vpr/src/base/echo_files.h b/vpr/src/base/echo_files.h index 2aa9253617b..3a3507f60ca 100644 --- a/vpr/src/base/echo_files.h +++ b/vpr/src/base/echo_files.h @@ -43,6 +43,7 @@ enum e_echo_files { E_ECHO_CHAN_DETAILS, E_ECHO_SBLOCK_PATTERN, E_ECHO_ENDPOINT_TIMING, + E_ECHO_LOOKAHEAD_MAP, //Timing Graphs E_ECHO_PRE_PACKING_TIMING_GRAPH, diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 28470df9142..902fd0d616c 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -648,6 +648,8 @@ struct ParseRouterLookahead { conv_value.set_value(e_router_lookahead::CLASSIC); else if (str == "map") conv_value.set_value(e_router_lookahead::MAP); + else if (str == "connection_box_map") + conv_value.set_value(e_router_lookahead::CONNECTION_BOX_MAP); else { std::stringstream msg; msg << "Invalid conversion from '" @@ -661,17 +663,22 @@ struct ParseRouterLookahead { ConvertedValue to_str(e_router_lookahead val) { ConvertedValue conv_value; - if (val == e_router_lookahead::CLASSIC) + if (val == e_router_lookahead::CLASSIC) { conv_value.set_value("classic"); - else { - VTR_ASSERT(val == e_router_lookahead::MAP); + } else if (val == e_router_lookahead::MAP) { conv_value.set_value("map"); + } else if (val == e_router_lookahead::CONNECTION_BOX_MAP) { + conv_value.set_value("connection_box_map"); + } else { + std::stringstream msg; + msg << "Unrecognized e_router_lookahead"; + conv_value.set_error(msg.str()); } return conv_value; } std::vector default_choices() { - return {"classic", "map"}; + return {"classic", "map", "connection_box_map"}; } }; @@ -931,6 +938,32 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio .default_value("on") .show_in(argparse::ShowIn::HELP_ONLY); + gen_grp.add_argument(args.disable_errors, "--disable_errors") + .help( + "Parses a list of functions for which the errors are going to be treated as warnings.\n" + "Each function in the list is delimited by `:`\n" + "This option should be only used for development purposes.") + .default_value(""); + + gen_grp.add_argument(args.suppress_warnings, "--suppress_warnings") + .help( + "Parses a list of functions for which the warnings will be suppressed on stdout.\n" + "The first element of the list is the name of the output log file with the suppressed warnings.\n" + "The file name and the list of functions is separated by `,`\n" + "Each function in the list is delimited by `:`\n" + "This option should be only used for development purposes.") + .default_value(""); + + gen_grp.add_argument(args.allow_dangling_combinational_nodes, "--allow_dangling_combinational_nodes") + .help( + "Option to allow dangling combinational nodes in the timing graph.\n" + "This option should normally be off, as dangling combinational nodes are unusual\n" + "in the timing graph and may indicate a problem in the circuit or architecture.\n" + "Unless you understand why your architecture/circuit can have valid dangling combinational nodes, this option should be off.\n" + "In general this is a dev-only option and should not be turned on by the end-user.") + .default_value("off") + .show_in(argparse::ShowIn::HELP_ONLY); + auto& file_grp = parser.add_argument_group("file options"); file_grp.add_argument(args.BlifFile, "--circuit_file") @@ -1533,6 +1566,11 @@ static argparse::ArgumentParser create_arg_parser(std::string prog_name, t_optio .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); + route_timing_grp.add_argument(args.disable_check_route, "--disable_check_route") + .help("Disables check_route once routing step has finished or when routing file is loaded") + .default_value("off") + .show_in(argparse::ShowIn::HELP_ONLY); + route_timing_grp.add_argument(args.router_debug_net, "--router_debug_net") .help( "Controls when router debugging is enabled.\n" diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 2227656c1af..fed61143b88 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -50,6 +50,9 @@ struct t_options { argparse::ArgValue clock_modeling; argparse::ArgValue exit_before_pack; argparse::ArgValue strict_checks; + argparse::ArgValue disable_errors; + argparse::ArgValue suppress_warnings; + argparse::ArgValue allow_dangling_combinational_nodes; /* Atom netlist options */ argparse::ArgValue absorb_buffer_luts; @@ -118,6 +121,7 @@ struct t_options { argparse::ArgValue verify_binary_search; argparse::ArgValue RouterAlgorithm; argparse::ArgValue min_incremental_reroute_fanout; + argparse::ArgValue disable_check_route; /* Timing-driven router options only */ argparse::ArgValue astar_fac; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index d08a5764405..a5b64b81498 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -214,6 +214,29 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup /* Determine whether echo is on or off */ setEchoEnabled(options->CreateEchoFile); + /* + * Initialize the functions names for which VPR_THROWs + * are demoted to VTR_LOG_WARNs + */ + for (std::string func_name : vtr::split(options->disable_errors, std::string(":"))) { + map_error_activation_status(func_name); + } + + /* + * Initialize the functions names for which + * warnings are being suppressed + */ + std::vector split_warning_option = vtr::split(options->suppress_warnings, std::string(",")); + + // If the file or the list of functions is not provided + // no warning is suppressed + if (split_warning_option.size() == 2) { + set_noisy_warn_log_file(split_warning_option[0].data()); + for (std::string func_name : vtr::split(split_warning_option[1], std::string(":"))) { + add_warnings_to_suppress(func_name); + } + } + /* Read in arch and circuit */ SetupVPR(options, vpr_setup->TimingEnabled, @@ -275,7 +298,7 @@ void vpr_init(const int argc, const char** argv, t_options* options, t_vpr_setup auto& timing_ctx = g_vpr_ctx.mutable_timing(); { vtr::ScopedStartFinishTimer t("Build Timing Graph"); - timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph(); + timing_ctx.graph = TimingGraphBuilder(atom_ctx.nlist, atom_ctx.lookup).timing_graph(options->allow_dangling_combinational_nodes); VTR_LOG(" Timing Graph Nodes: %zu\n", timing_ctx.graph->nodes().size()); VTR_LOG(" Timing Graph Edges: %zu\n", timing_ctx.graph->edges().size()); VTR_LOG(" Timing Graph Levels: %zu\n", timing_ctx.graph->levels().size()); @@ -637,7 +660,9 @@ RouteStatus vpr_route_flow(t_vpr_setup& vpr_setup, const t_arch& arch) { std::string graphics_msg; if (route_status.success()) { //Sanity check the routing - check_route(router_opts.route_type); + if (!router_opts.disable_check_route) { + check_route(router_opts.route_type); + } get_serial_num(); //Update status diff --git a/vpr/src/base/vpr_context.h b/vpr/src/base/vpr_context.h index 7ddc42ff3be..0ef875206f7 100644 --- a/vpr/src/base/vpr_context.h +++ b/vpr/src/base/vpr_context.h @@ -20,6 +20,7 @@ #include "clock_connection_builders.h" #include "route_traceback.h" #include "place_macro.h" +#include "connection_box.h" //A Context is collection of state relating to a particular part of VPR // @@ -194,6 +195,8 @@ struct DeviceContext : public Context { * Clock Network ********************************************************************/ t_clock_arch* clock_arch; + + ConnectionBoxes connection_boxes; }; //State relating to power analysis diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index a90f3f9f3fd..b3db5d20c60 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -103,7 +103,10 @@ constexpr const char* EMPTY_BLOCK_NAME = "EMPTY"; enum class e_router_lookahead { CLASSIC, //VPR's classic lookahead (assumes uniform wire types) MAP, //Lookahead considering different wire types (see Oleg Petelin's MASc Thesis) - NO_OP //A no-operation lookahead which always returns zero + NO_OP, //A no-operation lookahead which always returns zero + CONNECTION_BOX_MAP, + // Lookahead considering different wire types and IPIN + // connection box. }; enum class e_route_bb_update { @@ -947,6 +950,7 @@ struct t_router_opts { float reconvergence_cpd_threshold; std::string first_iteration_timing_report_file; bool strict_checks; + bool disable_check_route; }; struct t_analysis_opts { diff --git a/vpr/src/pack/pack.cpp b/vpr/src/pack/pack.cpp index c841f16e453..5627135974d 100644 --- a/vpr/src/pack/pack.cpp +++ b/vpr/src/pack/pack.cpp @@ -279,9 +279,15 @@ static bool try_size_device_grid(const t_arch& arch, const std::mapsecond; + + int num_available_instances = device_ctx.grid.num_instances(type); + for (int itype = 0; itype < type->num_equivalent_tiles; itype++) { + num_available_instances += device_ctx.grid.num_instances(type->equivalent_tiles[itype]); + } + float util = 0.; if (num_instances != 0) { - util = num_instances / device_ctx.grid.num_instances(type); + util = num_instances / num_available_instances; } type_util[type] = util; diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 94ccc1a4788..96de3266233 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -287,7 +287,7 @@ static int try_place_macro(int itype, int ipos, int imacro); static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations); static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type); -static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& pipos, t_pl_loc& to); +static void initial_placement_location(const int* free_locations, int itype, int& pipos, t_pl_loc& to); static void initial_placement(enum e_pad_loc_type pad_loc_type, const char* pad_loc_file); @@ -318,6 +318,7 @@ static e_find_affected_blocks_result identify_macro_self_swap_affected_macros(st static e_find_affected_blocks_result record_macro_self_swaps(const int imacro, t_pl_offset swap_offset); bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to); +bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to); std::set determine_locations_emptied_by_move(); @@ -375,7 +376,7 @@ static void comp_td_costs(const PlaceDelayModel& delay_model, double* timing_cos static e_swap_result assess_swap(double delta_c, double t); -static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to); +static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to); static void get_non_updateable_bb(ClusterNetId net_id, t_bb* bb_coord_new); @@ -431,6 +432,7 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, static void log_move_abort(std::string reason); static void report_aborted_moves(); +std::vector get_available_tiles(t_type_ptr type); static int grid_to_compressed(const std::vector& coords, int point); static void print_place_status_header(); @@ -963,11 +965,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const if (fabs(new_bb_cost - costs->bb_cost) > costs->bb_cost * ERROR_TOL) { std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_bb_cost = %g, old bb_cost = %g\n", new_bb_cost, costs->bb_cost); - if (placer_opts.strict_checks) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str()); - } else { - VTR_LOG_WARN(msg.c_str()); - } + VPR_THROW(VPR_ERROR_PLACE, msg.c_str()); } costs->bb_cost = new_bb_cost; @@ -977,11 +975,7 @@ static void recompute_costs_from_scratch(const t_placer_opts& placer_opts, const if (fabs(new_timing_cost - costs->timing_cost) > costs->timing_cost * ERROR_TOL) { std::string msg = vtr::string_fmt("in recompute_costs_from_scratch: new_timing_cost = %g, old timing_cost = %g, ERROR_TOL = %g\n", new_timing_cost, costs->timing_cost, ERROR_TOL); - if (placer_opts.strict_checks) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, msg.c_str()); - } else { - VTR_LOG_WARN(msg.c_str()); - } + VPR_THROW(VPR_ERROR_PLACE, msg.c_str()); } costs->timing_cost = new_timing_cost; } else { @@ -1665,18 +1659,59 @@ bool is_legal_swap_to_location(ClusterBlockId blk, t_pl_loc to) { // * on chip, and // * match the correct block type // - //Note that we need to explicitly check that the types match, since the device floorplan is not + //Note that we need to explicitly check that the types match or are equivalent, since the device floorplan is not //(neccessarily) translationally invariant for an arbitrary macro auto& device_ctx = g_vpr_ctx.device(); auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); if (to.x < 0 || to.x >= int(device_ctx.grid.width()) || to.y < 0 || to.y >= int(device_ctx.grid.height()) - || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity - || (device_ctx.grid[to.x][to.y].type != cluster_ctx.clb_nlist.block_type(blk))) { + || to.z < 0 || to.z >= device_ctx.grid[to.x][to.y].type->capacity) { + return false; + } + + // Check if types are allowed to be swapped + auto blk_type_from = cluster_ctx.clb_nlist.block_type(blk); + auto blk_type_to = device_ctx.grid[to.x][to.y].type; + + // Check is to see if `from` type can be placed in `to` type + if (!blk_type_from->is_available_tile_index(blk_type_to->index)) { + return false; + } + + t_pl_loc from = place_ctx.block_locs[blk].loc; + if (!is_legal_blk_swap(from, to)) { + return false; + } + + return true; +} + +bool is_legal_blk_swap(t_pl_loc from, t_pl_loc to) { + // Make sure that when swapping, the block in the `to` location + // can be moved in the `from` location + + auto& device_ctx = g_vpr_ctx.device(); + auto& cluster_ctx = g_vpr_ctx.clustering(); + auto& place_ctx = g_vpr_ctx.placement(); + + ClusterBlockId blk_to = place_ctx.grid_blocks[to.x][to.y].blocks[to.z]; + + // In case `blk_to` is empty we can skip the second check + if (blk_to == EMPTY_BLOCK_ID) { + return true; + } + + auto blk_type_from = device_ctx.grid[from.x][from.y].type; + auto blk_type_to = cluster_ctx.clb_nlist.block_type(blk_to); + + // Check is to see if `to` type can be placed in `from` type + if (!blk_type_to->is_available_tile_index(blk_type_from->index)) { return false; } + return true; } @@ -1736,7 +1771,25 @@ static e_swap_result try_swap(float t, t_pl_loc from = place_ctx.block_locs[b_from].loc; auto cluster_from_type = cluster_ctx.clb_nlist.block_type(b_from); auto grid_from_type = g_vpr_ctx.device().grid[from.x][from.y].type; - VTR_ASSERT(cluster_from_type == grid_from_type); + + VTR_ASSERT(cluster_from_type->is_available_tile_index(grid_from_type->index)); + + t_type_ptr to_block_type = cluster_ctx.clb_nlist.block_type(b_from); + + // Find random equivalent type (could be of the same type as the `from` one) + if (to_block_type->num_equivalent_tiles > 0) { + int irand_block_type = std::rand() % (to_block_type->num_equivalent_tiles + 1); + + // If random index is 0 do not use an equivalent tile. + if (irand_block_type > 0) { + auto result = to_block_type->equivalent_tiles.find(irand_block_type - 1); + VTR_ASSERT(result != to_block_type->equivalent_tiles.end()); + + to_block_type = result->second; + } + } + + VTR_ASSERT(cluster_from_type->is_available_tile_index(to_block_type->index)); //Allow some fraction of moves to not be restricted by rlim, //in the hopes of better escaping local minima @@ -1745,7 +1798,7 @@ static e_swap_result try_swap(float t, } t_pl_loc to; - if (!find_to(cluster_ctx.clb_nlist.block_type(b_from), rlim, from, to)) + if (!find_to(to_block_type, grid_from_type, rlim, from, to)) return REJECTED; #if 0 @@ -1829,8 +1882,8 @@ static e_swap_result try_swap(float t, //VTR_ASSERT(check_macro_placement_consistency() == 0); #if 0 - //Check that each accepted swap yields a valid placement - check_place(*costs, delay_model, place_algorithm); + //Check that each accepted swap yields a valid placement + check_place(costs, *place_delay_model, place_algorithm); #endif return (keep_switch); @@ -2003,7 +2056,7 @@ static void update_td_delta_costs(const PlaceDelayModel& delay_model, const Clus } } -static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& to) { +static bool find_to(t_type_ptr to_type, t_type_ptr from_type, float rlim, const t_pl_loc from, t_pl_loc& to) { //Finds a legal swap to location for the given type, starting from 'x_from' and 'y_from' // //Note that the range limit (rlim) is applied in a logical sense (i.e. 'compressed' grid space consisting @@ -2013,29 +2066,39 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& // //This ensures that such blocks don't get locked down too early during placement (as would be the //case with a physical distance rlim) - auto& grid = g_vpr_ctx.device().grid; - - auto grid_type = grid[from.x][from.y].type; - VTR_ASSERT(type == grid_type); //Retrieve the compressed block grid for this block type - const auto& compressed_block_grid = f_compressed_block_grids[type->index]; + const auto& to_compressed_block_grid = f_compressed_block_grids[to_type->index]; + const auto& from_compressed_block_grid = f_compressed_block_grids[from_type->index]; //Determine the rlim in each dimension - int rlim_x = min(compressed_block_grid.compressed_to_grid_x.size(), rlim); - int rlim_y = min(compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */ + int rlim_x = min(to_compressed_block_grid.compressed_to_grid_x.size(), rlim); + int rlim_y = min(to_compressed_block_grid.compressed_to_grid_y.size(), rlim); /* for aspect_ratio != 1 case. */ //Determine the coordinates in the compressed grid space of the current block - int cx_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_x, from.x); - int cy_from = grid_to_compressed(compressed_block_grid.compressed_to_grid_y, from.y); + int cx_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_x, from.x); + int cy_from = grid_to_compressed(from_compressed_block_grid.compressed_to_grid_y, from.y); - //Determin the valid compressed grid location ranges - int min_cx = std::max(0, cx_from - rlim_x); - int max_cx = std::min(compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); - int delta_cx = max_cx - min_cx; + int min_cx, max_cx; + int min_cy, max_cy; + int delta_cx; - int min_cy = std::max(0, cy_from - rlim_y); - int max_cy = std::min(compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); + //Determine the valid compressed grid location ranges + if (to_type == from_type) { + min_cx = std::max(0, cx_from - rlim_x); + max_cx = std::min(to_compressed_block_grid.compressed_to_grid_x.size() - 1, cx_from + rlim_x); + delta_cx = max_cx - min_cx; + + min_cy = std::max(0, cy_from - rlim_y); + max_cy = std::min(to_compressed_block_grid.compressed_to_grid_y.size() - 1, cy_from + rlim_y); + } else { + min_cx = 0; + max_cx = to_compressed_block_grid.compressed_to_grid_x.size() - 1; + delta_cx = max_cx - min_cx; + + min_cy = 0; + max_cy = to_compressed_block_grid.compressed_to_grid_y.size() - 1; + } int cx_to = OPEN; int cy_to = OPEN; @@ -2062,19 +2125,19 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& // //The candidates are stored in a flat_map so we can efficiently find the set of valid //candidates with upper/lower bound. - auto y_lower_iter = compressed_block_grid.grid[cx_to].lower_bound(min_cy); - if (y_lower_iter == compressed_block_grid.grid[cx_to].end()) { + auto y_lower_iter = to_compressed_block_grid.grid[cx_to].lower_bound(min_cy); + if (y_lower_iter == to_compressed_block_grid.grid[cx_to].end()) { continue; } - auto y_upper_iter = compressed_block_grid.grid[cx_to].upper_bound(max_cy); + auto y_upper_iter = to_compressed_block_grid.grid[cx_to].upper_bound(max_cy); if (y_lower_iter->first > min_cy) { //No valid blocks at this x location which are within rlim_y // //Fall back to allow the whole y range - y_lower_iter = compressed_block_grid.grid[cx_to].begin(); - y_upper_iter = compressed_block_grid.grid[cx_to].end(); + y_lower_iter = to_compressed_block_grid.grid[cx_to].begin(); + y_upper_iter = to_compressed_block_grid.grid[cx_to].end(); min_cy = y_lower_iter->first; max_cy = (y_upper_iter - 1)->first; @@ -2120,19 +2183,19 @@ static bool find_to(t_type_ptr type, float rlim, const t_pl_loc from, t_pl_loc& VTR_ASSERT(cy_to != OPEN); //Convert to true (uncompressed) grid locations - to.x = compressed_block_grid.compressed_to_grid_x[cx_to]; - to.y = compressed_block_grid.compressed_to_grid_y[cy_to]; + to.x = to_compressed_block_grid.compressed_to_grid_x[cx_to]; + to.y = to_compressed_block_grid.compressed_to_grid_y[cy_to]; //Each x/y location contains only a single type, so we can pick a random //z (capcity) location - to.z = vtr::irand(type->capacity - 1); + to.z = vtr::irand(to_type->capacity - 1); auto& device_ctx = g_vpr_ctx.device(); - VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == type, "Type must match"); + VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].type == to_type, "Type must match"); VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].width_offset == 0, "Should be at block base location"); VTR_ASSERT_MSG(device_ctx.grid[to.x][to.y].height_offset == 0, "Should be at block base location"); - return true; + return is_legal_blk_swap(from, to); } static e_swap_result assess_swap(double delta_c, double t) { @@ -3161,11 +3224,10 @@ static int try_place_macro(int itype, int ipos, int imacro) { static void initial_placement_pl_macros(int macros_max_num_tries, int* free_locations) { int macro_placed; - int itype, itry, ipos; + int itry, ipos; ClusterBlockId blk_id; auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& device_ctx = g_vpr_ctx.device(); auto& place_ctx = g_vpr_ctx.placement(); auto& pl_macros = place_ctx.pl_macros; @@ -3177,49 +3239,62 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca // Assume that all the blocks in the macro are of the same type blk_id = pl_macros[imacro].members[0].blk_index; - itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - if (free_locations[itype] < int(pl_macros[imacro].members.size())) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n" - "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n", - pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); - } - - // Try to place the macro first, if can be placed - place them, otherwise try again - for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) { - // Choose a random position for the head - ipos = vtr::irand(free_locations[itype] - 1); - // Try to place the macro - macro_placed = try_place_macro(itype, ipos, imacro); + bool no_free_locations = true; + // Loop over all the possible equivalent tiles + for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) { + if (free_locations[itype] >= int(pl_macros[imacro].members.size())) { + no_free_locations = false; + } else { + continue; + } - } // Finished all tries + // Try to place the macro first, if can be placed - place them, otherwise try again + for (itry = 0; itry < macros_max_num_tries && macro_placed == false; itry++) { + // Choose a random position for the head + ipos = vtr::irand(free_locations[itype] - 1); - if (macro_placed == false) { - // if a macro still could not be placed after macros_max_num_tries times, - // go through the chip exhaustively to find a legal placement for the macro - // place the macro on the first location that is legal - // then set macro_placed = true; - // if there are no legal positions, error out - - // Exhaustive placement of carry macros - for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) { // Try to place the macro macro_placed = try_place_macro(itype, ipos, imacro); - } // Exhausted all the legal placement position for this macro + } // Finished all tries - // If macro could not be placed after exhaustive placement, error out if (macro_placed == false) { - // Error out - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place macro length %zu with head block %s (#%zu); not enough free locations of type %s (#%d).\n" - "Please manually size the FPGA because VPR can't do this yet.\n", - pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); + // if a macro still could not be placed after macros_max_num_tries times, + // go through the chip exhaustively to find a legal placement for the macro + // place the macro on the first location that is legal + // then set macro_placed = true; + // if there are no legal positions, error out + + // Exhaustive placement of carry macros + for (ipos = 0; ipos < free_locations[itype] && macro_placed == false; ipos++) { + // Try to place the macro + macro_placed = try_place_macro(itype, ipos, imacro); + + } // Exhausted all the legal placement position for this macro } + if (macro_placed == true) { + break; + } + } + + if (no_free_locations) { + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n" + "VPR cannot auto-size for your circuit, please resize the FPGA manually.\n", + pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); + } + + // If macro could not be placed even after exhaustive placement, error out + if (macro_placed == false) { + // Error out + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place macro length %d with head block %s (#%zu); not enough free locations.\n" + "Please manually size the FPGA because VPR can't do this yet.\n", + pl_macros[imacro].members.size(), cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); } else { // This macro has been placed successfully, proceed to place the next macro continue; @@ -3230,10 +3305,9 @@ static void initial_placement_pl_macros(int macros_max_num_tries, int* free_loca /* Place blocks that are NOT a part of any macro. * We'll randomly place each block in the clustered netlist, one by one. */ static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pad_loc_type) { - int itype, ipos; + int ipos; auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.mutable_placement(); - auto& device_ctx = g_vpr_ctx.device(); for (auto blk_id : cluster_ctx.clb_nlist.blocks()) { if (place_ctx.block_locs[blk_id].loc.x != -1) { // -1 is a sentinel for an empty block @@ -3249,45 +3323,54 @@ static void initial_placement_blocks(int* free_locations, enum e_pad_loc_type pa * Choose one randomly and put blk_id there. Then we don't want to pick * that location again, so remove it from the free_locations array. */ - itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - if (free_locations[itype] <= 0) { - vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, - "Initial placement failed.\n" - "Could not place block %s (#%zu); no free locations of type %s (#%d).\n", - cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id), device_ctx.block_types[itype].name, itype); - } + bool no_free_locations = true; + // Loop over all the possible equivalent tiles + for (int itype : get_available_tiles(cluster_ctx.clb_nlist.block_type(blk_id))) { + if (free_locations[itype] > 0) { + no_free_locations = false; + } else { + continue; + } + + t_pl_loc to; + initial_placement_location(free_locations, itype, ipos, to); + + // Make sure that the position is EMPTY_BLOCK before placing the block down + VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID); - t_pl_loc to; - initial_placement_location(free_locations, blk_id, ipos, to); + place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id; + place_ctx.grid_blocks[to.x][to.y].usage++; - // Make sure that the position is EMPTY_BLOCK before placing the block down - VTR_ASSERT(place_ctx.grid_blocks[to.x][to.y].blocks[to.z] == EMPTY_BLOCK_ID); + place_ctx.block_locs[blk_id].loc = to; - place_ctx.grid_blocks[to.x][to.y].blocks[to.z] = blk_id; - place_ctx.grid_blocks[to.x][to.y].usage++; + //Mark IOs as fixed if specifying a (fixed) random placement + if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) { + place_ctx.block_locs[blk_id].is_fixed = true; + } - place_ctx.block_locs[blk_id].loc = to; + /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the + * legal positions in legal_pos to remove the entry (choice) we just used, but faster to + * just move the last entry in legal_pos to the spot we just used and decrement the + * count of free_locations. */ + legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */ + free_locations[itype]--; - //Mark IOs as fixed if specifying a (fixed) random placement - if (is_io_type(cluster_ctx.clb_nlist.block_type(blk_id)) && pad_loc_type == RANDOM) { - place_ctx.block_locs[blk_id].is_fixed = true; + //Do not check other type as the block has already been placed + break; } - /* Ensure randomizer doesn't pick this location again, since it's occupied. Could shift all the - * legal positions in legal_pos to remove the entry (choice) we just used, but faster to - * just move the last entry in legal_pos to the spot we just used and decrement the - * count of free_locations. */ - legal_pos[itype][ipos] = legal_pos[itype][free_locations[itype] - 1]; /* overwrite used block position */ - free_locations[itype]--; + // Check if there were no available locations + if (no_free_locations) { + vpr_throw(VPR_ERROR_PLACE, __FILE__, __LINE__, + "Initial placement failed.\n" + "Could not place block %s (#%zu); no free locations\n", + cluster_ctx.clb_nlist.block_name(blk_id).c_str(), size_t(blk_id)); + } } } } -static void initial_placement_location(const int* free_locations, ClusterBlockId blk_id, int& ipos, t_pl_loc& to) { - auto& cluster_ctx = g_vpr_ctx.clustering(); - - int itype = cluster_ctx.clb_nlist.block_type(blk_id)->index; - +static void initial_placement_location(const int* free_locations, int itype, int& ipos, t_pl_loc& to) { ipos = vtr::irand(free_locations[itype] - 1); to = legal_pos[itype][ipos]; } @@ -3544,7 +3627,7 @@ static int check_block_placement_consistency() { if (EMPTY_BLOCK_ID == bnum || INVALID_BLOCK_ID == bnum) continue; - if (cluster_ctx.clb_nlist.block_type(bnum) != device_ctx.grid[i][j].type) { + if (!cluster_ctx.clb_nlist.block_type(bnum)->is_available_tile_index(device_ctx.grid[i][j].type->index)) { VTR_LOG_ERROR("Block %zu type (%s) does not match grid location (%zu,%zu) type (%s).\n", size_t(bnum), cluster_ctx.clb_nlist.block_type(bnum)->name, i, j, device_ctx.grid[i][j].type->name); error++; @@ -3665,6 +3748,19 @@ static void generate_post_place_timing_reports(const t_placer_opts& placer_opts, timing_reporter.report_timing_setup(placer_opts.post_place_timing_report_file, *timing_info.setup_analyzer(), analysis_opts.timing_report_npaths); } +std::vector get_available_tiles(t_type_ptr type) { + std::vector types(1, type->index); + + for (int i = 0; i < type->num_equivalent_tiles; i++) { + auto result = type->equivalent_tiles.find(i); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + types.push_back(result->second->index); + } + + return types; +} + #if 0 static void update_screen_debug(); diff --git a/vpr/src/place/timing_place_lookup.cpp b/vpr/src/place/timing_place_lookup.cpp index e6e0e1dccda..60c13a079e1 100644 --- a/vpr/src/place/timing_place_lookup.cpp +++ b/vpr/src/place/timing_place_lookup.cpp @@ -264,9 +264,14 @@ static float route_connection_delay(int source_x, int source_y, int sink_x, int VTR_ASSERT(sink_rr_node != OPEN); - successfully_routed = calculate_delay(source_rr_node, sink_rr_node, - router_opts, - &net_delay_value); + { + vtr::ScopedStartFinishTimer timer(vtr::string_fmt( + "Routing Src: %d Sink: %d", source_rr_node, + sink_rr_node)); + successfully_routed = calculate_delay(source_rr_node, sink_rr_node, + router_opts, + &net_delay_value); + } if (successfully_routed) break; } diff --git a/vpr/src/route/check_route.cpp b/vpr/src/route/check_route.cpp index 84b6290a144..dbef691c861 100644 --- a/vpr/src/route/check_route.cpp +++ b/vpr/src/route/check_route.cpp @@ -118,7 +118,7 @@ void check_route(enum e_route_type route_type) { } else { //Continuing along existing branch connects = check_adjacent(prev_node, inode); if (!connects) { - vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, + VPR_THROW(VPR_ERROR_ROUTE, "in check_route: found non-adjacent segments in traceback while checking net %d:\n" " %s\n" " %s\n", diff --git a/vpr/src/route/check_rr_graph.cpp b/vpr/src/route/check_rr_graph.cpp index 3699746fda5..846680ab69e 100644 --- a/vpr/src/route/check_rr_graph.cpp +++ b/vpr/src/route/check_rr_graph.cpp @@ -502,7 +502,7 @@ static void check_unbuffered_edges(int from_node) { } if (trans_matched == false) { - vpr_throw(VPR_ERROR_ROUTE, __FILE__, __LINE__, + VPR_THROW(VPR_ERROR_ROUTE, "in check_unbuffered_edges:\n" "connection from node %d to node %d uses an unbuffered switch (switch type %d '%s')\n" "but there is no corresponding unbuffered switch edge in the other direction.\n", diff --git a/vpr/src/route/connection_box.cpp b/vpr/src/route/connection_box.cpp new file mode 100644 index 00000000000..85d554b4307 --- /dev/null +++ b/vpr/src/route/connection_box.cpp @@ -0,0 +1,127 @@ +#include "connection_box.h" +#include "vtr_assert.h" +#include "globals.h" + +ConnectionBoxes::ConnectionBoxes() + : size_(std::make_pair(0, 0)) { +} + +size_t ConnectionBoxes::num_connection_box_types() const { + return boxes_.size(); +} + +std::pair ConnectionBoxes::connection_box_grid_size() const { + return size_; +} + +const ConnectionBox* ConnectionBoxes::get_connection_box(ConnectionBoxId box) const { + if (bool(box)) { + return nullptr; + } + + size_t index = size_t(box); + if (index >= boxes_.size()) { + return nullptr; + } + + return &boxes_.at(index); +} + +bool ConnectionBoxes::find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location) const { + VTR_ASSERT(box_id != nullptr); + VTR_ASSERT(box_location != nullptr); + + const auto& conn_box_loc = ipin_map_[inode]; + if (conn_box_loc.box_id == ConnectionBoxId::INVALID()) { + return false; + } + + *box_id = conn_box_loc.box_id; + *box_location = conn_box_loc.box_location; + return true; +} + +// Clear IPIN map and set connection box grid size and box ids. +void ConnectionBoxes::reset_boxes(std::pair size, + const std::vector boxes) { + clear(); + + size_ = size; + boxes_ = boxes; +} + +void ConnectionBoxes::resize_nodes(size_t rr_node_size) { + ipin_map_.resize(rr_node_size); + canonical_loc_map_.resize(rr_node_size, + std::make_pair(-1, -1)); +} + +void ConnectionBoxes::clear() { + ipin_map_.clear(); + size_ = std::make_pair(0, 0); + boxes_.clear(); + canonical_loc_map_.clear(); + sink_to_ipin_.clear(); +} + +void ConnectionBoxes::add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location) { + // Ensure that box location is in bounds + VTR_ASSERT(box_location.first < size_.first); + VTR_ASSERT(box_location.second < size_.second); + + // Bounds check box_id + VTR_ASSERT(bool(box_id)); + VTR_ASSERT(size_t(box_id) < boxes_.size()); + + // Make sure sink map will not be invalidated upon insertion. + VTR_ASSERT(sink_to_ipin_.size() == 0); + + ipin_map_[inode] = ConnBoxLoc(box_location, box_id); +} + +void ConnectionBoxes::add_canonical_loc(int inode, std::pair loc) { + VTR_ASSERT(loc.first < size_.first); + VTR_ASSERT(loc.second < size_.second); + canonical_loc_map_[inode] = loc; +} + +const std::pair* ConnectionBoxes::find_canonical_loc(int inode) const { + const auto& canon_loc = canonical_loc_map_[inode]; + if (canon_loc.first == size_t(-1)) { + return nullptr; + } + + return &canon_loc; +} + +void ConnectionBoxes::create_sink_back_ref() { + const auto& device_ctx = g_vpr_ctx.device(); + + sink_to_ipin_.resize(device_ctx.rr_nodes.size(), {{0, 0, 0, 0}, 0}); + + for (size_t i = 0; i < device_ctx.rr_nodes.size(); ++i) { + const auto& ipin_node = device_ctx.rr_nodes[i]; + if (ipin_node.type() != IPIN) { + continue; + } + + if (ipin_map_[i].box_id == ConnectionBoxId::INVALID()) { + continue; + } + + for (auto edge : ipin_node.edges()) { + int sink_inode = ipin_node.edge_sink_node(edge); + VTR_ASSERT(device_ctx.rr_nodes[sink_inode].type() == SINK); + VTR_ASSERT(sink_to_ipin_[sink_inode].ipin_count < 4); + auto& sink_to_ipin = sink_to_ipin_[sink_inode]; + sink_to_ipin.ipin_nodes[sink_to_ipin.ipin_count++] = i; + } + } +} + +const SinkToIpin& ConnectionBoxes::find_sink_connection_boxes( + int inode) const { + return sink_to_ipin_[inode]; +} diff --git a/vpr/src/route/connection_box.h b/vpr/src/route/connection_box.h new file mode 100644 index 00000000000..06217ac2a41 --- /dev/null +++ b/vpr/src/route/connection_box.h @@ -0,0 +1,76 @@ +#ifndef CONNECTION_BOX_H +#define CONNECTION_BOX_H +// Some routing graphs have connectivity driven by types of connection boxes. +// This class relates IPIN rr nodes with connection box type and locations, used +// for connection box driven map lookahead. + +#include +#include "vtr_strong_id.h" +#include "vtr_flat_map.h" +#include "vtr_range.h" +#include + +struct connection_box_tag {}; +typedef vtr::StrongId ConnectionBoxId; + +struct ConnectionBox { + std::string name; +}; + +struct ConnBoxLoc { + ConnBoxLoc() + : box_location(std::make_pair(-1, -1)) {} + ConnBoxLoc( + const std::pair& a_box_location, + ConnectionBoxId a_box_id) + : box_location(a_box_location) + , box_id(a_box_id) {} + + std::pair box_location; + ConnectionBoxId box_id; +}; + +struct SinkToIpin { + int ipin_nodes[4]; + int ipin_count; +}; + +class ConnectionBoxes { + public: + ConnectionBoxes(); + + size_t num_connection_box_types() const; + std::pair connection_box_grid_size() const; + const ConnectionBox* get_connection_box(ConnectionBoxId box) const; + + bool find_connection_box(int inode, + ConnectionBoxId* box_id, + std::pair* box_location) const; + const std::pair* find_canonical_loc(int inode) const; + + // Clear IPIN map and set connection box grid size and box ids. + void clear(); + void reset_boxes(std::pair size, + const std::vector boxes); + void resize_nodes(size_t rr_node_size); + + void add_connection_box(int inode, ConnectionBoxId box_id, std::pair box_location); + void add_canonical_loc(int inode, std::pair loc); + + // Create map from SINK's back to IPIN's + // + // This must be called after all connection boxes have been added. + void create_sink_back_ref(); + const SinkToIpin& find_sink_connection_boxes( + int inode) const; + + private: + std::pair size_; + std::vector boxes_; + std::vector ipin_map_; + std::vector sink_to_ipin_; + std::vector> + canonical_loc_map_; +}; + +#endif diff --git a/vpr/src/route/connection_box_lookahead_map.cpp b/vpr/src/route/connection_box_lookahead_map.cpp new file mode 100644 index 00000000000..fc806f67687 --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.cpp @@ -0,0 +1,460 @@ +#include "connection_box_lookahead_map.h" + +#include +#include + +#include "connection_box.h" +#include "rr_node.h" +#include "router_lookahead_map_utils.h" +#include "globals.h" +#include "vtr_math.h" +#include "vtr_time.h" +#include "echo_files.h" + +/* we're profiling routing cost over many tracks for each wire type, so we'll + * have many cost entries at each |dx|,|dy| offset. There are many ways to + * "boil down" the many costs at each offset to a single entry for a given + * (wire type, chan_type) combination we can take the smallest cost, the + * average, median, etc. This define selects the method we use. + * + * See e_representative_entry_method */ +#define REPRESENTATIVE_ENTRY_METHOD SMALLEST + +#define REF_X 25 +#define REF_Y 23 + +static int signum(int x) { + if (x > 0) return 1; + if (x < 0) + return -1; + else + return 0; +} + +typedef std::vector, Cost_Entry>> t_routing_cost_map; +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* cost_map); + +class CostMap { + public: + void set_segment_count(size_t seg_count) { + cost_map_.clear(); + offset_.clear(); + cost_map_.resize(seg_count); + offset_.resize(seg_count); + + const auto& device_ctx = g_vpr_ctx.device(); + segment_map_.resize(device_ctx.rr_nodes.size()); + for (size_t i = 0; i < segment_map_.size(); ++i) { + auto& from_node = device_ctx.rr_nodes[i]; + + int from_cost_index = from_node.cost_index(); + int from_seg_index = device_ctx.rr_indexed_data[from_cost_index].seg_index; + + segment_map_[i] = from_seg_index; + } + } + + int node_to_segment(int from_node_ind) { + return segment_map_[from_node_ind]; + } + + Cost_Entry find_cost(int from_seg_index, int delta_x, int delta_y) const { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + int dx = delta_x - offset_[from_seg_index].first; + int dy = delta_y - offset_[from_seg_index].second; + const auto& cost_map = cost_map_[from_seg_index]; + + if (dx < 0) { + dx = 0; + } + if (dy < 0) { + dy = 0; + } + + if (dx >= (ssize_t)cost_map.dim_size(0)) { + dx = cost_map.dim_size(0) - 1; + } + if (dy >= (ssize_t)cost_map.dim_size(1)) { + dy = cost_map.dim_size(1) - 1; + } + + return cost_map_[from_seg_index][dx][dy]; + } + + void set_cost_map(int from_seg_index, + const t_routing_cost_map& cost_map, + e_representative_entry_method method) { + VTR_ASSERT(from_seg_index >= 0 && from_seg_index < (ssize_t)offset_.size()); + + // Find coordinate offset for this segment. + int min_dx = 0; + int min_dy = 0; + int max_dx = 0; + int max_dy = 0; + for (const auto& entry : cost_map) { + min_dx = std::min(entry.first.first, min_dx); + min_dy = std::min(entry.first.second, min_dy); + + max_dx = std::max(entry.first.first, max_dx); + max_dy = std::max(entry.first.second, max_dy); + } + + offset_[from_seg_index].first = min_dx; + offset_[from_seg_index].second = min_dy; + size_t dim_x = max_dx - min_dx + 1; + size_t dim_y = max_dy - min_dy + 1; + + vtr::NdMatrix expansion_cost_map( + {dim_x, dim_y}); + + for (const auto& entry : cost_map) { + int x = entry.first.first - min_dx; + int y = entry.first.second - min_dy; + expansion_cost_map[x][y].add_cost_entry( + method, entry.second.delay, + entry.second.congestion); + } + + cost_map_[from_seg_index] = vtr::NdMatrix( + {dim_x, dim_y}); + + /* set the lookahead cost map entries with a representative cost + * entry from routing_cost_map */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + cost_map_[from_seg_index][ix][iy] = expansion_cost_map[ix][iy].get_representative_cost_entry(method); + } + } + + /* find missing cost entries and fill them in by copying a nearby cost entry */ + for (unsigned ix = 0; ix < expansion_cost_map.dim_size(0); ix++) { + for (unsigned iy = 0; iy < expansion_cost_map.dim_size(1); iy++) { + Cost_Entry cost_entry = cost_map_[from_seg_index][ix][iy]; + + if (!cost_entry.valid()) { + Cost_Entry copied_entry = get_nearby_cost_entry( + from_seg_index, + offset_[from_seg_index].first + ix, + offset_[from_seg_index].second + iy); + cost_map_[from_seg_index][ix][iy] = copied_entry; + } + } + } + } + + Cost_Entry get_nearby_cost_entry(int segment_index, int x, int y) { + /* compute the slope from x,y to 0,0 and then move towards 0,0 by one + * unit to get the coordinates of the cost entry to be copied */ + + float slope; + int copy_x, copy_y; + if (x == 0 || y == 0) { + slope = std::numeric_limits::infinity(); + copy_x = x - signum(x); + copy_y = y - signum(y); + } else { + slope = (float)y / (float)x; + if (slope >= 1.0) { + copy_y = y - signum(y); + copy_x = vtr::nint((float)y / slope); + } else { + copy_x = x - signum(x); + copy_y = vtr::nint((float)x * slope); + } + } + + Cost_Entry copy_entry = find_cost(segment_index, copy_x, copy_y); + + /* if the entry to be copied is also empty, recurse */ + if (copy_entry.valid()) { + return copy_entry; + } else if (copy_x == 0 && copy_y == 0) { + return Cost_Entry(); + } + + return get_nearby_cost_entry(segment_index, copy_x, copy_y); + } + + void print_cost_map(const std::vector& segment_inf, + const char* fname) { + FILE* fp = vtr::fopen(fname, "w"); + for (size_t iseg = 0; iseg < cost_map_.size(); iseg++) { + fprintf(fp, "Seg %s(%zu) (%d, %d)\n", segment_inf.at(iseg).name.c_str(), + iseg, + offset_[iseg].first, + offset_[iseg].second); + for (size_t iy = 0; iy < cost_map_[iseg].dim_size(1); iy++) { + for (size_t ix = 0; ix < cost_map_[iseg].dim_size(0); ix++) { + fprintf(fp, "%.4g,\t", + cost_map_[iseg][ix][iy].delay); + } + fprintf(fp, "\n"); + } + fprintf(fp, "\n\n"); + } + + fclose(fp); + } + + private: + std::vector> cost_map_; + std::vector> offset_; + std::vector segment_map_; +}; + +static CostMap g_cost_map; + +class StartNode { + public: + StartNode(int start_x, int start_y, t_rr_type rr_type, int seg_index) + : start_x_(start_x) + , start_y_(start_y) + , rr_type_(rr_type) + , seg_index_(seg_index) + , index_(0) {} + int get_next_node() { + const auto& device_ctx = g_vpr_ctx.device(); + const std::vector& channel_node_list = device_ctx.rr_node_indices[rr_type_][start_x_][start_y_][0]; + + for (; index_ < channel_node_list.size(); index_++) { + int node_ind = channel_node_list[index_]; + + if (node_ind == OPEN || device_ctx.rr_nodes[node_ind].capacity() == 0) { + continue; + } + + const std::pair* loc = device_ctx.connection_boxes.find_canonical_loc(node_ind); + if (loc == nullptr) { + continue; + } + + int node_cost_ind = device_ctx.rr_nodes[node_ind].cost_index(); + int node_seg_ind = device_ctx.rr_indexed_data[node_cost_ind].seg_index; + if (node_seg_ind == seg_index_) { + index_ += 1; + return node_ind; + } + } + + return UNDEFINED; + } + + private: + int start_x_; + int start_y_; + t_rr_type rr_type_; + int seg_index_; + size_t index_; +}; + +// Minimum size of search for channels to profile. kMinProfile results +// in searching x = [0, kMinProfile], and y = [0, kMinProfile[. +// +// Making this value larger will increase the sample size, but also the runtime +// to produce the lookahead. +static constexpr int kMinProfile = 1; + +// Maximum size of search for channels to profile. Once search is outside of +// kMinProfile distance, lookahead will stop searching once: +// - At least one channel has been profiled +// - kMaxProfile is exceeded. +static constexpr int kMaxProfile = 7; + +void compute_connection_box_lookahead( + const std::vector& segment_inf) { + size_t num_segments = segment_inf.size(); + vtr::ScopedStartFinishTimer timer("Computing connection box lookahead map"); + + /* free previous delay map and allocate new one */ + g_cost_map.set_segment_count(segment_inf.size()); + + /* run Dijkstra's algorithm for each segment type & channel type combination */ + for (int iseg = 0; iseg < (ssize_t)num_segments; iseg++) { + VTR_LOG("Creating cost map for %s(%d)\n", + segment_inf[iseg].name.c_str(), iseg); + /* allocate the cost map for this iseg/chan_type */ + t_routing_cost_map cost_map; + + int count = 0; + + int dx = 0; + int dy = 0; + //int start_x = vtr::nint(device_ctx.grid.width()/2); + //int start_y = vtr::nint(device_ctx.grid.height()/2); + int start_x = REF_X; + int start_y = REF_Y; + while ((count == 0 && dx < kMaxProfile) || dy <= kMinProfile) { + for (e_rr_type chan_type : {CHANX, CHANY}) { + StartNode start_node(start_x + dx, start_y + dy, chan_type, iseg); + + for (int start_node_ind = start_node.get_next_node(); + start_node_ind != UNDEFINED; + start_node_ind = start_node.get_next_node()) { + count += 1; + + /* run Dijkstra's algorithm */ + run_dijkstra(start_node_ind, &cost_map); + } + } + + if (dy < dx) { + dy += 1; + } else { + dx += 1; + } + } + + if (count == 0) { + VTR_LOG_WARN("Segment %s(%d) found no start_node_ind\n", + segment_inf[iseg].name.c_str(), iseg); + } + + /* boil down the cost list in routing_cost_map at each coordinate to a + * representative cost entry and store it in the lookahead cost map */ + g_cost_map.set_cost_map(iseg, cost_map, + REPRESENTATIVE_ENTRY_METHOD); + } + + if (getEchoEnabled() && isEchoFileEnabled(E_ECHO_LOOKAHEAD_MAP)) { + g_cost_map.print_cost_map(segment_inf, getEchoFileName(E_ECHO_LOOKAHEAD_MAP)); + } +} + +float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac) { + if (from_node_ind == to_node_ind) { + return 0.f; + } + + auto& device_ctx = g_vpr_ctx.device(); + + std::pair from_location; + std::pair to_location; + auto to_node_type = device_ctx.rr_nodes[to_node_ind].type(); + + if (to_node_type == SINK) { + const auto& sink_to_ipin = device_ctx.connection_boxes.find_sink_connection_boxes(to_node_ind); + if (sink_to_ipin.ipin_count > 1) { + float cost = std::numeric_limits::infinity(); + // Find cheapest cost from from_node_ind to IPINs for this SINK. + for (int i = 0; i < sink_to_ipin.ipin_count; ++i) { + cost = std::min(cost, + get_connection_box_lookahead_map_cost( + from_node_ind, + sink_to_ipin.ipin_nodes[i], criticality_fac)); + } + + return cost; + } else if (sink_to_ipin.ipin_count == 1) { + to_node_ind = sink_to_ipin.ipin_nodes[0]; + if (from_node_ind == to_node_ind) { + return 0.f; + } + } else { + return std::numeric_limits::infinity(); + } + } + + if (device_ctx.rr_nodes[to_node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + bool found = device_ctx.connection_boxes.find_connection_box( + to_node_ind, &box_id, &box_location); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", to_node_ind); + } + + to_location = box_location; + } else { + const std::pair* to_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(to_node_ind); + if (!to_canonical_loc) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d", to_node_ind); + } + + to_location = *to_canonical_loc; + } + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(from_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical loc for %d (to %d)", + from_node_ind, to_node_ind); + } + + ssize_t dx = ssize_t(from_canonical_loc->first) - ssize_t(to_location.first); + ssize_t dy = ssize_t(from_canonical_loc->second) - ssize_t(to_location.second); + + int from_seg_index = g_cost_map.node_to_segment(from_node_ind); + Cost_Entry cost_entry = g_cost_map.find_cost(from_seg_index, dx, dy); + float expected_delay = cost_entry.delay; + float expected_congestion = cost_entry.congestion; + + float expected_cost = criticality_fac * expected_delay + (1.0 - criticality_fac) * expected_congestion; + return expected_cost; +} + +/* runs Dijkstra's algorithm from specified node until all nodes have been + * visited. Each time a pin is visited, the delay/congestion information + * to that pin is stored to an entry in the routing_cost_map */ +static void run_dijkstra(int start_node_ind, + t_routing_cost_map* routing_cost_map) { + auto& device_ctx = g_vpr_ctx.device(); + + /* a list of boolean flags (one for each rr node) to figure out if a + * certain node has already been expanded */ + std::vector node_expanded(device_ctx.rr_nodes.size(), false); + /* for each node keep a list of the cost with which that node has been + * visited (used to determine whether to push a candidate node onto the + * expansion queue */ + std::vector node_visited_costs(device_ctx.rr_nodes.size(), -1.0); + /* a priority queue for expansion */ + std::priority_queue pq; + + /* first entry has no upstream delay or congestion */ + PQ_Entry first_entry(start_node_ind, UNDEFINED, 0, 0, 0, true); + + pq.push(first_entry); + + const std::pair* from_canonical_loc = device_ctx.connection_boxes.find_canonical_loc(start_node_ind); + if (from_canonical_loc == nullptr) { + VPR_THROW(VPR_ERROR_ROUTE, "No canonical location of node %d", + start_node_ind); + } + + /* now do routing */ + while (!pq.empty()) { + PQ_Entry current = pq.top(); + pq.pop(); + + int node_ind = current.rr_node_ind; + + /* check that we haven't already expanded from this node */ + if (node_expanded[node_ind]) { + continue; + } + + /* if this node is an ipin record its congestion/delay in the routing_cost_map */ + if (device_ctx.rr_nodes[node_ind].type() == IPIN) { + ConnectionBoxId box_id; + std::pair box_location; + bool found = device_ctx.connection_boxes.find_connection_box( + node_ind, &box_id, &box_location); + if (!found) { + VPR_THROW(VPR_ERROR_ROUTE, "No connection box for IPIN %d", node_ind); + } + + int delta_x = ssize_t(from_canonical_loc->first) - ssize_t(box_location.first); + int delta_y = ssize_t(from_canonical_loc->second) - ssize_t(box_location.second); + + routing_cost_map->push_back(std::make_pair( + std::make_pair(delta_x, delta_y), + Cost_Entry( + current.delay, + current.congestion_upstream))); + } + + expand_dijkstra_neighbours(current, node_visited_costs, node_expanded, pq); + node_expanded[node_ind] = true; + } +} diff --git a/vpr/src/route/connection_box_lookahead_map.h b/vpr/src/route/connection_box_lookahead_map.h new file mode 100644 index 00000000000..75771a1fd6b --- /dev/null +++ b/vpr/src/route/connection_box_lookahead_map.h @@ -0,0 +1,14 @@ +#ifndef CONNECTION_BOX_LOOKAHEAD_H_ +#define CONNECTION_BOX_LOOKAHEAD_H_ + +#include +#include "physical_types.h" + +void compute_connection_box_lookahead( + const std::vector& segment_inf); + +float get_connection_box_lookahead_map_cost(int from_node_ind, + int to_node_ind, + float criticality_fac); + +#endif diff --git a/vpr/src/route/router_lookahead.cpp b/vpr/src/route/router_lookahead.cpp index 645aca4a7ee..96a824a0de2 100644 --- a/vpr/src/route/router_lookahead.cpp +++ b/vpr/src/route/router_lookahead.cpp @@ -1,6 +1,7 @@ #include "router_lookahead.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "vpr_error.h" #include "globals.h" #include "route_timing.h" @@ -13,6 +14,8 @@ std::unique_ptr make_router_lookahead(e_router_lookahead router return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::MAP) { return std::make_unique(); + } else if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + return std::make_unique(); } else if (router_lookahead_type == e_router_lookahead::NO_OP) { return std::make_unique(); } @@ -81,6 +84,25 @@ float MapLookahead::get_expected_cost(int current_node, int target_node, const t } } +float ConnectionBoxMapLookahead::get_expected_cost( + int current_node, + int target_node, + const t_conn_cost_params& params, + float /*R_upstream*/) const { + auto& device_ctx = g_vpr_ctx.device(); + + t_rr_type rr_type = device_ctx.rr_nodes[current_node].type(); + + if (rr_type == CHANX || rr_type == CHANY) { + return get_connection_box_lookahead_map_cost( + current_node, target_node, params.criticality); + } else if (rr_type == IPIN) { /* Change if you're allowing route-throughs */ + return (device_ctx.rr_indexed_data[SINK_COST_INDEX].base_cost); + } else { /* Change this if you want to investigate route-throughs */ + return (0.); + } +} + float NoOpLookahead::get_expected_cost(int /*current_node*/, int /*target_node*/, const t_conn_cost_params& /*params*/, float /*R_upstream*/) const { return 0.; } diff --git a/vpr/src/route/router_lookahead.h b/vpr/src/route/router_lookahead.h index 6880651e887..07138ddb4d5 100644 --- a/vpr/src/route/router_lookahead.h +++ b/vpr/src/route/router_lookahead.h @@ -27,6 +27,11 @@ class MapLookahead : public RouterLookahead { float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; }; +class ConnectionBoxMapLookahead : public RouterLookahead { + protected: + float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; +}; + class NoOpLookahead : public RouterLookahead { protected: float get_expected_cost(int node, int target_node, const t_conn_cost_params& params, float R_upstream) const override; diff --git a/vpr/src/route/router_lookahead_map_utils.cpp b/vpr/src/route/router_lookahead_map_utils.cpp new file mode 100644 index 00000000000..b76edcc7e70 --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.cpp @@ -0,0 +1,192 @@ +#include "router_lookahead_map_utils.h" + +#include "globals.h" +#include "vpr_context.h" +#include "vtr_math.h" + +/* Number of CLBs I think the average conn. goes. */ +static const int CLB_DIST = 3; + +PQ_Entry::PQ_Entry( + int set_rr_node_ind, + int switch_ind, + float parent_delay, + float parent_R_upstream, + float parent_congestion_upstream, + bool starting_node) { + this->rr_node_ind = set_rr_node_ind; + + auto& device_ctx = g_vpr_ctx.device(); + this->delay = parent_delay; + this->congestion_upstream = parent_congestion_upstream; + this->R_upstream = parent_R_upstream; + if (!starting_node) { + int cost_index = device_ctx.rr_nodes[set_rr_node_ind].cost_index(); + + float Tsw = device_ctx.rr_switch_inf[switch_ind].Tdel; + float Rsw = device_ctx.rr_switch_inf[switch_ind].R; + float Cnode = device_ctx.rr_nodes[set_rr_node_ind].C(); + float Rnode = device_ctx.rr_nodes[set_rr_node_ind].R(); + + float T_linear = 0.f; + float T_quadratic = 0.f; + if (device_ctx.rr_switch_inf[switch_ind].buffered()) { + T_linear = Tsw + Rsw * Cnode + 0.5 * Rnode * Cnode; + T_quadratic = 0.; + } else { /* Pass transistor */ + T_linear = Tsw + 0.5 * Rsw * Cnode; + T_quadratic = (Rsw + Rnode) * 0.5 * Cnode; + } + + float base_cost; + if (device_ctx.rr_indexed_data[cost_index].inv_length < 0) { + base_cost = device_ctx.rr_indexed_data[cost_index].base_cost; + } else { + float frac_num_seg = CLB_DIST * device_ctx.rr_indexed_data[cost_index].inv_length; + + base_cost = frac_num_seg * T_linear + + frac_num_seg * frac_num_seg * T_quadratic; + } + + VTR_ASSERT(T_linear >= 0.); + VTR_ASSERT(base_cost >= 0.); + this->delay += T_linear; + + this->congestion_upstream += base_cost; + } + + /* set the cost of this node */ + this->cost = this->delay; +} + +/* returns cost entry with the smallest delay */ +Cost_Entry Expansion_Cost_Entry::get_smallest_entry() const { + Cost_Entry smallest_entry; + + for (auto entry : this->cost_vector) { + if (!smallest_entry.valid() || entry.delay < smallest_entry.delay) { + smallest_entry = entry; + } + } + + return smallest_entry; +} + +/* returns a cost entry that represents the average of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_average_entry() const { + float avg_delay = 0; + float avg_congestion = 0; + + for (auto cost_entry : this->cost_vector) { + avg_delay += cost_entry.delay; + avg_congestion += cost_entry.congestion; + } + + avg_delay /= (float)this->cost_vector.size(); + avg_congestion /= (float)this->cost_vector.size(); + + return Cost_Entry(avg_delay, avg_congestion); +} + +/* returns a cost entry that represents the geomean of all the recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_geomean_entry() const { + float geomean_delay = 0; + float geomean_cong = 0; + for (auto cost_entry : this->cost_vector) { + geomean_delay += log(cost_entry.delay); + geomean_cong += log(cost_entry.congestion); + } + + geomean_delay = exp(geomean_delay / (float)this->cost_vector.size()); + geomean_cong = exp(geomean_cong / (float)this->cost_vector.size()); + + return Cost_Entry(geomean_delay, geomean_cong); +} + +/* returns a cost entry that represents the medial of all recorded entries */ +Cost_Entry Expansion_Cost_Entry::get_median_entry() const { + /* find median by binning the delays of all entries and then chosing the bin + * with the largest number of entries */ + + int num_bins = 10; + + /* find entries with smallest and largest delays */ + Cost_Entry min_del_entry; + Cost_Entry max_del_entry; + for (auto entry : this->cost_vector) { + if (!min_del_entry.valid() || entry.delay < min_del_entry.delay) { + min_del_entry = entry; + } + if (!max_del_entry.valid() || entry.delay > max_del_entry.delay) { + max_del_entry = entry; + } + } + + /* get the bin size */ + float delay_diff = max_del_entry.delay - min_del_entry.delay; + float bin_size = delay_diff / (float)num_bins; + + /* sort the cost entries into bins */ + std::vector > entry_bins(num_bins, std::vector()); + for (auto entry : this->cost_vector) { + float bin_num = floor((entry.delay - min_del_entry.delay) / bin_size); + + VTR_ASSERT(vtr::nint(bin_num) >= 0 && vtr::nint(bin_num) <= num_bins); + if (vtr::nint(bin_num) == num_bins) { + /* largest entry will otherwise have an out-of-bounds bin number */ + bin_num -= 1; + } + entry_bins[vtr::nint(bin_num)].push_back(entry); + } + + /* find the bin with the largest number of elements */ + int largest_bin = 0; + int largest_size = 0; + for (int ibin = 0; ibin < num_bins; ibin++) { + if (entry_bins[ibin].size() > (unsigned)largest_size) { + largest_bin = ibin; + largest_size = (unsigned)entry_bins[ibin].size(); + } + } + + /* get the representative delay of the largest bin */ + Cost_Entry representative_entry = entry_bins[largest_bin][0]; + + return representative_entry; +} + +/* iterates over the children of the specified node and selectively pushes them onto the priority queue */ +void expand_dijkstra_neighbours(PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq) { + auto& device_ctx = g_vpr_ctx.device(); + + int parent_ind = parent_entry.rr_node_ind; + + auto& parent_node = device_ctx.rr_nodes[parent_ind]; + + for (int iedge = 0; iedge < parent_node.num_edges(); iedge++) { + int child_node_ind = parent_node.edge_sink_node(iedge); + int switch_ind = parent_node.edge_switch(iedge); + + /* skip this child if it has already been expanded from */ + if (node_expanded[child_node_ind]) { + continue; + } + + PQ_Entry child_entry(child_node_ind, switch_ind, parent_entry.delay, + parent_entry.R_upstream, parent_entry.congestion_upstream, false); + + VTR_ASSERT(child_entry.cost >= 0); + + /* skip this child if it has been visited with smaller cost */ + if (node_visited_costs[child_node_ind] >= 0 && node_visited_costs[child_node_ind] < child_entry.cost) { + continue; + } + + /* finally, record the cost with which the child was visited and put the child entry on the queue */ + node_visited_costs[child_node_ind] = child_entry.cost; + pq.push(child_entry); + } +} diff --git a/vpr/src/route/router_lookahead_map_utils.h b/vpr/src/route/router_lookahead_map_utils.h new file mode 100644 index 00000000000..d0077ccb9bc --- /dev/null +++ b/vpr/src/route/router_lookahead_map_utils.h @@ -0,0 +1,142 @@ +#ifndef ROUTER_LOOKAHEAD_MAP_UTILS_H_ +#define ROUTER_LOOKAHEAD_MAP_UTILS_H_ +/* + * The router lookahead provides an estimate of the cost from an intermediate node to the target node + * during directed (A*-like) routing. + * + * The VPR 7.0 lookahead (route/route_timing.c ==> get_timing_driven_expected_cost) lower-bounds the remaining delay and + * congestion by assuming that a minimum number of wires, of the same type as the current node being expanded, can be used + * to complete the route. While this method is efficient, it can run into trouble with architectures that use + * multiple interconnected wire types. + * + * The lookahead in this file pre-computes delay/congestion costs up and to the right of a starting tile. This generates + * delay/congestion tables for {CHANX, CHANY} channel types, over all wire types defined in the architecture file. + * See Section 3.2.4 in Oleg Petelin's MASc thesis (2016) for more discussion. + * + */ + +#include +#include +#include +#include +#include "vpr_types.h" + +/* when a list of delay/congestion entries at a coordinate in Cost_Entry is boiled down to a single + * representative entry, this enum is passed-in to specify how that representative entry should be + * calculated */ +enum e_representative_entry_method { + FIRST = 0, //the first cost that was recorded + SMALLEST, //the smallest-delay cost recorded + AVERAGE, + GEOMEAN, + MEDIAN +}; + +/* f_cost_map is an array of these cost entries that specifies delay/congestion estimates + * to travel relative x/y distances */ +class Cost_Entry { + public: + float delay; + float congestion; + + Cost_Entry() { + delay = std::numeric_limits::infinity(); + congestion = std::numeric_limits::infinity(); + } + Cost_Entry(float set_delay, float set_congestion) { + delay = set_delay; + congestion = set_congestion; + } + + bool valid() const { + return std::isfinite(delay) && std::isfinite(congestion); + } +}; + +/* a class that stores delay/congestion information for a given relative coordinate during the Dijkstra expansion. + * since it stores multiple cost entries, it is later boiled down to a single representative cost entry to be stored + * in the final lookahead cost map */ +class Expansion_Cost_Entry { + private: + std::vector cost_vector; + + Cost_Entry get_smallest_entry() const; + Cost_Entry get_average_entry() const; + Cost_Entry get_geomean_entry() const; + Cost_Entry get_median_entry() const; + + public: + void add_cost_entry(e_representative_entry_method method, + float add_delay, + float add_congestion) { + Cost_Entry cost_entry(add_delay, add_congestion); + if (method == SMALLEST) { + /* taking the smallest-delay entry anyway, so no need to push back multple entries */ + if (this->cost_vector.empty()) { + this->cost_vector.push_back(cost_entry); + } else { + if (add_delay < this->cost_vector[0].delay) { + this->cost_vector[0] = cost_entry; + } + } + } else { + this->cost_vector.push_back(cost_entry); + } + } + void clear_cost_entries() { + this->cost_vector.clear(); + } + + Cost_Entry get_representative_cost_entry(e_representative_entry_method method) const { + Cost_Entry entry; + + if (!cost_vector.empty()) { + switch (method) { + case FIRST: + entry = cost_vector[0]; + break; + case SMALLEST: + entry = this->get_smallest_entry(); + break; + case AVERAGE: + entry = this->get_average_entry(); + break; + case GEOMEAN: + entry = this->get_geomean_entry(); + break; + case MEDIAN: + entry = this->get_median_entry(); + break; + default: + break; + } + } + return entry; + } +}; + +/* a class that represents an entry in the Dijkstra expansion priority queue */ +class PQ_Entry { + public: + int rr_node_ind; //index in device_ctx.rr_nodes that this entry represents + float cost; //the cost of the path to get to this node + + /* store backward delay, R and congestion info */ + float delay; + float R_upstream; + float congestion_upstream; + + PQ_Entry(int set_rr_node_ind, int /*switch_ind*/, float parent_delay, float parent_R_upstream, float parent_congestion_upstream, bool starting_node); + + bool operator<(const PQ_Entry& obj) const { + /* inserted into max priority queue so want queue entries with a lower cost to be greater */ + return (this->cost > obj.cost); + } +}; + +void expand_dijkstra_neighbours(PQ_Entry parent_entry, + std::vector& node_visited_costs, + std::vector& node_expanded, + std::priority_queue& pq); + +#endif diff --git a/vpr/src/route/rr_graph.cpp b/vpr/src/route/rr_graph.cpp index d00df14ef95..809301682cc 100644 --- a/vpr/src/route/rr_graph.cpp +++ b/vpr/src/route/rr_graph.cpp @@ -34,6 +34,7 @@ using namespace std; #include "rr_graph_writer.h" #include "rr_graph_reader.h" #include "router_lookahead_map.h" +#include "connection_box_lookahead_map.h" #include "rr_graph_clock.h" #include "rr_types.h" @@ -384,6 +385,10 @@ void create_rr_graph(const t_graph_type graph_type, compute_router_lookahead(segment_inf.size()); } + if (router_lookahead_type == e_router_lookahead::CONNECTION_BOX_MAP) { + compute_connection_box_lookahead(segment_inf); + } + //Write out rr graph file if needed if (!det_routing_arch->write_rr_graph_filename.empty()) { write_rr_graph(det_routing_arch->write_rr_graph_filename.c_str(), segment_inf); @@ -893,6 +898,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx, device_ctx.rr_switch_inf[rr_switch_idx].set_type(device_ctx.arch_switch_inf[arch_switch_idx].type()); device_ctx.rr_switch_inf[rr_switch_idx].R = device_ctx.arch_switch_inf[arch_switch_idx].R; device_ctx.rr_switch_inf[rr_switch_idx].Cin = device_ctx.arch_switch_inf[arch_switch_idx].Cin; + device_ctx.rr_switch_inf[rr_switch_idx].Cinternal = device_ctx.arch_switch_inf[arch_switch_idx].Cinternal; //now we can retrieve Cinternal from the arch and implement into the rr calculations. device_ctx.rr_switch_inf[rr_switch_idx].Cout = device_ctx.arch_switch_inf[arch_switch_idx].Cout; device_ctx.rr_switch_inf[rr_switch_idx].Tdel = rr_switch_Tdel; device_ctx.rr_switch_inf[rr_switch_idx].mux_trans_size = device_ctx.arch_switch_inf[arch_switch_idx].mux_trans_size; diff --git a/vpr/src/route/rr_graph_reader.cpp b/vpr/src/route/rr_graph_reader.cpp index 465ea6d92f9..6d5756c91fa 100644 --- a/vpr/src/route/rr_graph_reader.cpp +++ b/vpr/src/route/rr_graph_reader.cpp @@ -56,6 +56,7 @@ void verify_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_blocks(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void verify_grid(pugi::xml_node parent, const pugiutil::loc_data& loc_data, const DeviceGrid& grid); void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data); +void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, int* wire_to_rr_ipin_switch, const int num_rr_switches); void process_channels(t_chan_width& chan_width, pugi::xml_node parent, const pugiutil::loc_data& loc_data); void process_rr_node_indices(const DeviceGrid& grid); @@ -133,6 +134,13 @@ void load_rr_file(const t_graph_type graph_type, next_component = get_first_child(rr_graph, "channels", loc_data); process_channels(nodes_per_chan, next_component, loc_data); + next_component = get_first_child(rr_graph, "connection_boxes", loc_data, OPTIONAL); + if (next_component != nullptr) { + process_connection_boxes(next_component, loc_data); + } else { + device_ctx.connection_boxes.clear(); + } + /* Decode the graph_type */ bool is_global_graph = (GRAPH_GLOBAL == graph_type ? true : false); @@ -146,6 +154,7 @@ void load_rr_file(const t_graph_type graph_type, int num_rr_nodes = count_children(next_component, "node", loc_data); device_ctx.rr_nodes.resize(num_rr_nodes); + device_ctx.connection_boxes.resize_nodes(num_rr_nodes); process_nodes(next_component, loc_data); /* Loads edges, switches, and node look up tables*/ @@ -179,6 +188,7 @@ void load_rr_file(const t_graph_type graph_type, device_ctx.chan_width = nodes_per_chan; check_rr_graph(graph_type, grid, device_ctx.block_types); + device_ctx.connection_boxes.create_sink_back_ref(); } catch (XmlError& e) { vpr_throw(VPR_ERROR_ROUTE, read_rr_graph_name, e.line(), "%s", e.what()); @@ -234,11 +244,13 @@ void process_switches(pugi::xml_node parent, const pugiutil::loc_data& loc_data) rr_switch.R = get_attribute(SwitchSubnode, "R", loc_data).as_float(); rr_switch.Cin = get_attribute(SwitchSubnode, "Cin", loc_data).as_float(); rr_switch.Cout = get_attribute(SwitchSubnode, "Cout", loc_data).as_float(); + rr_switch.Cinternal = get_attribute(SwitchSubnode, "Cinternal", loc_data).as_float(); rr_switch.Tdel = get_attribute(SwitchSubnode, "Tdel", loc_data).as_float(); } else { rr_switch.R = 0; rr_switch.Cin = 0; rr_switch.Cout = 0; + rr_switch.Cinternal = 0; rr_switch.Tdel = 0; } SwitchSubnode = get_single_child(Switch, "sizing", loc_data); @@ -304,6 +316,18 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { node.set_type(OPIN); } else if (strcmp(node_type, "IPIN") == 0) { node.set_type(IPIN); + + pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "connection_box", loc_data, OPTIONAL); + if (connection_boxSubnode) { + int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int(); + int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int(); + int id = get_attribute(connection_boxSubnode, "id", loc_data).as_int(); + + device_ctx.connection_boxes.add_connection_box(inode, + ConnectionBoxId(id), + std::make_pair(x, y)); + } + } else { vpr_throw(VPR_ERROR_OTHER, __FILE__, __LINE__, "Valid inputs for class types are \"CHANX\", \"CHANY\",\"SOURCE\", \"SINK\",\"OPIN\", and \"IPIN\"."); @@ -323,6 +347,15 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { } } + pugi::xml_node connection_boxSubnode = get_single_child(rr_node, "canonical_loc", loc_data, OPTIONAL); + if (connection_boxSubnode) { + int x = get_attribute(connection_boxSubnode, "x", loc_data).as_int(); + int y = get_attribute(connection_boxSubnode, "y", loc_data).as_int(); + + device_ctx.connection_boxes.add_canonical_loc(inode, + std::make_pair(x, y)); + } + node.set_capacity(get_attribute(rr_node, "capacity", loc_data).as_float()); //-------------- @@ -876,3 +909,26 @@ void set_cost_indices(pugi::xml_node parent, const pugiutil::loc_data& loc_data, rr_node = rr_node.next_sibling(rr_node.name()); } } + +void process_connection_boxes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) { + auto& device_ctx = g_vpr_ctx.mutable_device(); + + int x_dim = get_attribute(parent, "x_dim", loc_data).as_int(0); + int y_dim = get_attribute(parent, "y_dim", loc_data).as_int(0); + int num_boxes = get_attribute(parent, "num_boxes", loc_data).as_int(0); + VTR_ASSERT(num_boxes >= 0); + + pugi::xml_node connection_box = get_first_child(parent, "connection_box", loc_data); + std::vector boxes(num_boxes); + while (connection_box) { + int id = get_attribute(connection_box, "id", loc_data).as_int(-1); + const char* name = get_attribute(connection_box, "name", loc_data).as_string(nullptr); + VTR_ASSERT(id >= 0 && id < num_boxes); + VTR_ASSERT(boxes.at(id).name == ""); + boxes.at(id).name = std::string(name); + + connection_box = connection_box.next_sibling(connection_box.name()); + } + + device_ctx.connection_boxes.reset_boxes(std::make_pair(x_dim, y_dim), boxes); +} diff --git a/vpr/src/route/rr_graph_writer.cpp b/vpr/src/route/rr_graph_writer.cpp index fccc0341136..48012725a65 100644 --- a/vpr/src/route/rr_graph_writer.cpp +++ b/vpr/src/route/rr_graph_writer.cpp @@ -189,7 +189,8 @@ void write_rr_switches(fstream& fp) { } fp << ">" << endl; - fp << "\t\t\t" << endl; + fp << "\t\t\t" << endl; fp << "\t\t\t" << endl; fp << "\t\t" << endl; } diff --git a/vpr/src/route/rr_node.h b/vpr/src/route/rr_node.h index 16075b8b176..5d610824684 100644 --- a/vpr/src/route/rr_node.h +++ b/vpr/src/route/rr_node.h @@ -173,7 +173,7 @@ class t_rr_node { uint16_t edges_capacity_ = 0; uint8_t num_non_configurable_edges_ = 0; - int8_t cost_index_ = -1; + uint16_t cost_index_ = -1; int16_t rc_index_ = -1; int16_t xlow_ = -1; diff --git a/vpr/src/timing/clb_delay_calc.inl b/vpr/src/timing/clb_delay_calc.inl index b19effad62b..47d76bf5d36 100644 --- a/vpr/src/timing/clb_delay_calc.inl +++ b/vpr/src/timing/clb_delay_calc.inl @@ -73,7 +73,8 @@ inline float ClbDelayCalc::pb_route_delay(ClusterBlockId clb, int pb_route_idx, inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId clb, int pb_route_idx) const { auto& cluster_ctx = g_vpr_ctx.clustering(); - int type_index = cluster_ctx.clb_nlist.block_type(clb)->index; + //Getting the original block type in case the CLB has been placed in an equivalent tile. + int type_index = cluster_ctx.clb_nlist.block_type(clb, false)->index; const t_pb* pb = cluster_ctx.clb_nlist.block_pb(clb); if (pb->pb_route.count(pb_route_idx)) { @@ -84,7 +85,7 @@ inline const t_pb_graph_edge* ClbDelayCalc::find_pb_graph_edge(ClusterBlockId cl const t_pb_graph_pin* pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, pb_route_idx); const t_pb_graph_pin* upstream_pb_gpin = intra_lb_pb_pin_lookup_.pb_gpin(type_index, upstream_pb_route_idx); - return find_pb_graph_edge(upstream_pb_gpin, pb_gpin); + return find_pb_graph_edge(upstream_pb_gpin, pb_gpin); } } diff --git a/vpr/src/timing/timing_graph_builder.cpp b/vpr/src/timing/timing_graph_builder.cpp index 2ae02a020db..192b9ed8aaa 100644 --- a/vpr/src/timing/timing_graph_builder.cpp +++ b/vpr/src/timing/timing_graph_builder.cpp @@ -40,8 +40,8 @@ TimingGraphBuilder::TimingGraphBuilder(const AtomNetlist& netlist, //pass } -std::unique_ptr TimingGraphBuilder::timing_graph() { - build(); +std::unique_ptr TimingGraphBuilder::timing_graph(bool allow_dangling_combinational_nodes) { + build(allow_dangling_combinational_nodes); opt_memory_layout(); VTR_ASSERT(tg_); @@ -50,9 +50,13 @@ std::unique_ptr TimingGraphBuilder::timing_graph() { return std::move(tg_); } -void TimingGraphBuilder::build() { +void TimingGraphBuilder::build(bool allow_dangling_combinational_nodes) { tg_ = std::make_unique(); + // Optionally allow dangling combinational nodes. + // Set by `--allow_dangling_combinational_nodes on`. Default value is false + tg_->set_allow_dangling_combinational_nodes(allow_dangling_combinational_nodes); + for (AtomBlockId blk : netlist_.blocks()) { AtomBlockType blk_type = netlist_.block_type(blk); diff --git a/vpr/src/timing/timing_graph_builder.h b/vpr/src/timing/timing_graph_builder.h index 0ca93d1e19f..8e6745b7cb1 100644 --- a/vpr/src/timing/timing_graph_builder.h +++ b/vpr/src/timing/timing_graph_builder.h @@ -10,10 +10,10 @@ class TimingGraphBuilder { TimingGraphBuilder(const AtomNetlist& netlist, AtomLookup& netlist_lookup); - std::unique_ptr timing_graph(); + std::unique_ptr timing_graph(bool allow_dangling_combinational_nodes); private: - void build(); + void build(bool allow_dangling_combinational_nodes); void opt_memory_layout(); void add_io_to_timing_graph(const AtomBlockId blk); diff --git a/vpr/src/timing/timing_util.cpp b/vpr/src/timing/timing_util.cpp index e260b8f5cfa..8bc919dac3f 100644 --- a/vpr/src/timing/timing_util.cpp +++ b/vpr/src/timing/timing_util.cpp @@ -571,6 +571,10 @@ float calc_relaxed_criticality(const std::map& domains_max_re max_req += shift; } + if (!std::isfinite(slack)) { + continue; + } + float crit = std::numeric_limits::quiet_NaN(); if (max_req > 0.) { //Standard case diff --git a/vpr/src/util/vpr_error.cpp b/vpr/src/util/vpr_error.cpp index 96e0f4bba3f..2bad3b6f919 100644 --- a/vpr/src/util/vpr_error.cpp +++ b/vpr/src/util/vpr_error.cpp @@ -1,6 +1,8 @@ #include +#include #include "vtr_util.h" +#include "vtr_log.h" #include "vpr_error.h" /* Date:June 15th, 2013 @@ -11,6 +13,10 @@ * anything but throw an exception which will be caught * main.c. */ +void map_error_activation_status(std::string function_name) { + functions_to_demote.insert(function_name); +} + void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, @@ -41,3 +47,38 @@ void vvpr_throw(enum e_vpr_error type, throw VprError(type, msg, psz_file_name, line_num); } + +void vpr_throw_msg(enum e_vpr_error type, + const char* psz_file_name, + unsigned int line_num, + std::string msg) { + throw VprError(type, msg, psz_file_name, line_num); +} + +void vpr_throw_opt(enum e_vpr_error type, + const char* psz_func_name, + const char* psz_file_name, + unsigned int line_num, + const char* psz_message, + ...) { + std::string func_name(psz_func_name); + + // Make a variable argument list + va_list va_args; + + // Initialize variable argument list + va_start(va_args, psz_message); + + //Format the message + std::string msg = vtr::vstring_fmt(psz_message, va_args); + + auto result = functions_to_demote.find(func_name); + if (result != functions_to_demote.end()) { + VTR_LOGFF_WARN(psz_file_name, line_num, psz_func_name, msg.data()); + } else { + vpr_throw_msg(type, psz_file_name, line_num, msg); + } + + // Reset variable argument list + va_end(va_args); +} diff --git a/vpr/src/util/vpr_error.h b/vpr/src/util/vpr_error.h index f999889359b..32619251c49 100644 --- a/vpr/src/util/vpr_error.h +++ b/vpr/src/util/vpr_error.h @@ -1,8 +1,11 @@ #ifndef VPR_ERROR_H #define VPR_ERROR_H -#include "vtr_error.h" #include +#include +#include + +#include "vtr_error.h" enum e_vpr_error { VPR_ERROR_UNKNOWN = 0, @@ -45,6 +48,15 @@ class VprError : public vtr::VtrError { t_vpr_error_type type_; }; +// Set of function names for which the VPR_THROW errors are treated +// as VTR_LOG_WARN +static std::unordered_set functions_to_demote; + +// This function is used to save into the functions_to_demote set +// all the function names which contain VPR_THROW errors that are +// going to be demoted to be VTR_LOG_WARN +void map_error_activation_status(std::string function_name); + //VPR error reporting routines // //Note that we mark these functions with the C++11 attribute 'noreturn' @@ -52,14 +64,17 @@ class VprError : public vtr::VtrError { //reduce false-positive compiler warnings [[noreturn]] void vpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...); [[noreturn]] void vvpr_throw(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, const char* psz_message, va_list args); +[[noreturn]] void vpr_throw_msg(enum e_vpr_error type, const char* psz_file_name, unsigned int line_num, std::string msg); + +void vpr_throw_opt(enum e_vpr_error type, const char* psz_func_name, const char* psz_file_name, unsigned int line_num, const char* psz_message, ...); /* * Macro wrapper around vpr_throw() which automatically * specifies file and line number of call site. */ -#define VPR_THROW(type, ...) \ - do { \ - vpr_throw(type, __FILE__, __LINE__, __VA_ARGS__); \ +#define VPR_THROW(type, ...) \ + do { \ + vpr_throw_opt(type, __func__, __FILE__, __LINE__, __VA_ARGS__); \ } while (false) #endif diff --git a/vpr/src/util/vpr_utils.cpp b/vpr/src/util/vpr_utils.cpp index 34b15f205b4..21ea0b3ca19 100644 --- a/vpr/src/util/vpr_utils.cpp +++ b/vpr/src/util/vpr_utils.cpp @@ -1,6 +1,8 @@ #include #include #include +#include + using namespace std; #include "vtr_assert.h" @@ -18,7 +20,6 @@ using namespace std; #include "string.h" #include "pack_types.h" #include "device_grid.h" -#include /* This module contains subroutines that are used in several unrelated parts * * of VPR. They are VPR-specific utility routines. */ @@ -89,6 +90,10 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_ static bool block_type_contains_blif_model(t_type_ptr type, const std::regex& blif_model_regex); static bool pb_type_contains_blif_model(const t_pb_type* pb_type, const std::regex& blif_model_regex); +static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype); +static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr phyical_type); +static int get_type_pin(std::unordered_map> pin_mappings, int eq_type_index, int eq_pin); + /******************** Subroutine definitions *********************************/ const t_model* find_model(const t_model* models, const std::string& name, bool required) { @@ -133,6 +138,46 @@ void print_tabs(FILE* fpout, int num_tab) { } } +static t_type_ptr get_equivalent_tile(t_type_ptr type, int eq_itype) { + auto result = type->equivalent_tiles.find(eq_itype); + VTR_ASSERT(result != type->equivalent_tiles.end()); + + return result->second; +} + +static int get_type_pin(std::unordered_map> pin_mappings, int eq_type_index, int eq_pin) { + auto tile_result = pin_mappings.find(eq_type_index); + VTR_ASSERT(tile_result != pin_mappings.end()); + + auto pin_mapping = tile_result->second; + auto pin_result = pin_mapping.find(eq_pin); + VTR_ASSERT(pin_result != pin_mapping.end()); + + return pin_result->second; +} + +static bool try_sync_equivalent_tiles(ClusterBlockId clb, t_type_ptr logic_type, t_type_ptr physical_type) { + auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); + auto& clb_nlist = cluster_ctx.clb_nlist; + + //Searching for equivalent tiles in the logic_type + for (int itype = 0; itype < logic_type->num_equivalent_tiles; itype++) { + if (get_equivalent_tile(logic_type, itype)->index == physical_type->index) { + clb_nlist.set_equivalent_block_type(clb, itype, physical_type); + + //Setting new logical to physical pin mapping + for (auto pin : clb_nlist.block_pins(clb)) { + int original_ipin = clb_nlist.pin_physical_index(pin); + int new_ipin = get_type_pin(logic_type->equivalent_tile_pin_mapping, itype, original_ipin); + clb_nlist.set_pin_physical_index(pin, new_ipin); + } + return true; + } + } + + return false; +} + /* Points the place_ctx.grid_blocks structure back to the blocks list */ void sync_grid_to_blocks() { auto& place_ctx = g_vpr_ctx.mutable_placement(); @@ -170,11 +215,16 @@ void sync_grid_to_blocks() { } /* Check types match */ - if (cluster_ctx.clb_nlist.block_type(blk_id) != device_ctx.grid[blk_x][blk_y].type) { - VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n", - blk_x, blk_y, - cluster_ctx.clb_nlist.block_type(blk_id)->name, - device_ctx.grid[blk_x][blk_y].type->name); + auto logic_type = cluster_ctx.clb_nlist.block_type(blk_id); + auto physical_type = device_ctx.grid[blk_x][blk_y].type; + + if (logic_type != physical_type) { + if (!try_sync_equivalent_tiles(blk_id, logic_type, physical_type)) { + VPR_THROW(VPR_ERROR_PLACE, "A block is in a grid location (%d x %d) with a conflicting types '%s' and '%s' .\n", + blk_x, blk_y, + cluster_ctx.clb_nlist.block_type(blk_id)->name, + device_ctx.grid[blk_x][blk_y].type->name); + } } /* Check already in use */ @@ -443,7 +493,9 @@ static AtomPinId find_atom_pin_for_pb_route_id(ClusterBlockId clb, int pb_route_ VTR_ASSERT_MSG(cluster_ctx.clb_nlist.block_pb(clb)->pb_route[pb_route_id].atom_net_id, "PB route should correspond to a valid atom net"); //Find the graph pin associated with this pb_route - const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(cluster_ctx.clb_nlist.block_type(clb)->index, pb_route_id); + int index = cluster_ctx.clb_nlist.block_type(clb, false)->index; + + const t_pb_graph_pin* gpin = pb_gpin_lookup.pb_gpin(index, pb_route_id); VTR_ASSERT(gpin); //Get the PB associated with this block @@ -541,25 +593,36 @@ int find_clb_pb_pin(ClusterBlockId clb, int clb_pin) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - VTR_ASSERT_MSG(clb_pin < cluster_ctx.clb_nlist.block_type(clb)->num_pins, "Must be a valid top-level pin"); + auto& clb_nlist = cluster_ctx.clb_nlist; + + auto type = clb_nlist.block_type(clb); + + int pin = clb_pin; + + // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB). + // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type. + if (clb_nlist.block_eq_type_effective(clb)) { + int eq_type_index = clb_nlist.block_eq_type_index(clb); + auto block_type = clb_nlist.block_type(clb, false); + + pin = get_type_pin(block_type->equivalent_tile_inverse_pin_mapping, eq_type_index, clb_pin); + } - int pb_pin = -1; + int pb_pin = OPEN; if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) { //Pins have been offset by z-coordinate, need to remove offset - t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb); VTR_ASSERT(type->num_pins % type->capacity == 0); int num_basic_block_pins = type->num_pins / type->capacity; /* Logical location and physical location is offset by z * max_num_block_pins */ - pb_pin = clb_pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins; + pb_pin = pin - place_ctx.block_locs[clb].loc.z * num_basic_block_pins; } else { //No offset - pb_pin = clb_pin; + pb_pin = pin; } VTR_ASSERT(pb_pin >= 0); - return pb_pin; } @@ -568,21 +631,35 @@ int find_pb_pin_clb_pin(ClusterBlockId clb, int pb_pin) { auto& cluster_ctx = g_vpr_ctx.clustering(); auto& place_ctx = g_vpr_ctx.placement(); - int clb_pin = -1; + auto& clb_nlist = cluster_ctx.clb_nlist; + + auto type = clb_nlist.block_type(clb); + + int pin = pb_pin; + + // In case an equivalent tile is selected, the CLB block type will be different (e.g. CLB logic type is LAB, CLB physical type is MLAB). + // Therefore, I need to retrieve the pin mapping from the LAB type by setting the `false` flag when calling block_type. + if (clb_nlist.block_eq_type_effective(clb)) { + int eq_type_index = clb_nlist.block_eq_type_index(clb); + auto block_type = clb_nlist.block_type(clb, false); + + pin = get_type_pin(block_type->equivalent_tile_pin_mapping, eq_type_index, pb_pin); + } + + int clb_pin = OPEN; if (place_ctx.block_locs[clb].nets_and_pins_synced_to_z_coordinate) { //Pins have been offset by z-coordinate, need to remove offset - t_type_ptr type = cluster_ctx.clb_nlist.block_type(clb); VTR_ASSERT(type->num_pins % type->capacity == 0); int num_basic_block_pins = type->num_pins / type->capacity; /* Logical location and physical location is offset by z * max_num_block_pins */ - clb_pin = pb_pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins; + clb_pin = pin + place_ctx.block_locs[clb].loc.z * num_basic_block_pins; } else { //No offset - clb_pin = pb_pin; + clb_pin = pin; } - VTR_ASSERT(clb_pin >= 0); + VTR_ASSERT(clb_pin >= 0); return clb_pin; } diff --git a/vpr/src/util/vpr_utils.h b/vpr/src/util/vpr_utils.h index 4b6cd5ff09e..bc53e60950e 100644 --- a/vpr/src/util/vpr_utils.h +++ b/vpr/src/util/vpr_utils.h @@ -2,7 +2,9 @@ #define VPR_UTILS_H #include +#include #include + #include "vpr_types.h" #include "atom_netlist.h" #include "clustered_netlist.h" diff --git a/vtr_flow/arch/equivalent_tiles/slice.xml b/vtr_flow/arch/equivalent_tiles/slice.xml new file mode 100644 index 00000000000..b8a16a781eb --- /dev/null +++ b/vtr_flow/arch/equivalent_tiles/slice.xml @@ -0,0 +1,1625 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + io_tile.in io_tile.out + io_tile.in io_tile.out + io_tile.in io_tile.out + io_tile.in io_tile.out + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + 0.068e-9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1 1 1 1 1 1 1 1 1 1 1 1 1 + 1 1 1 1 1 1 1 1 1 1 1 1 + + + diff --git a/vtr_flow/scripts/add_tiles.py b/vtr_flow/scripts/add_tiles.py new file mode 100755 index 00000000000..14794bae284 --- /dev/null +++ b/vtr_flow/scripts/add_tiles.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +""" +This script is intended to modify the architecture description file to be compliant with +the new format. + +It moves the top level pb_types attributes and tags to the tiles high-level tag. + +BEFORE: + + + + + + + + + + + + + + + +AFTER: + + + + + + + + + + + + + + + + + +""" + +from lxml import etree as ET +import argparse + +TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations'] +ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity'] + +def swap_tags(tile, pb_type): + # Moving tags from top level pb_type to tile + for child in pb_type: + if child.tag in TAGS_TO_SWAP: + pb_type.remove(child) + tile.append(child) + + +def main(): + parser = argparse.ArgumentParser( + description="Moves top level pb_types to tiles tag." + ) + parser.add_argument( + '--arch_xml', + required=True, + help="Input arch.xml that needs to be modified to move the top level pb_types to the `tiles` tag." + ) + + args = parser.parse_args() + + arch_xml = ET.ElementTree() + root_element = arch_xml.parse(args.arch_xml) + + tiles = ET.SubElement(root_element, 'tiles') + + top_pb_types = [] + for pb_type in root_element.iter('pb_type'): + if pb_type.getparent().tag == 'complexblocklist': + top_pb_types.append(pb_type) + + for pb_type in top_pb_types: + tile = ET.SubElement(tiles, 'tile') + attrs = pb_type.attrib + + for attr in attrs: + tile.set(attr, pb_type.get(attr)) + + # Remove attributes of top level pb_types only + for attr in ATTR_TO_REMOVE: + pb_type.attrib.pop(attr, None) + + swap_tags(tile, pb_type) + + print(ET.tostring(arch_xml, pretty_print=True).decode('utf-8')) + + +if __name__ == '__main__': + main() diff --git a/vtr_flow/scripts/run_vtr_flow.pl b/vtr_flow/scripts/run_vtr_flow.pl index 1d59a1e9bac..c851dc4886a 100755 --- a/vtr_flow/scripts/run_vtr_flow.pl +++ b/vtr_flow/scripts/run_vtr_flow.pl @@ -416,7 +416,10 @@ #system "cp $odin2_base_config" my $architecture_file_path_new = "$temp_dir$architecture_file_name"; -copy( $architecture_file_path, $architecture_file_path_new ); +my $ret = `$vtr_flow_path/scripts/add_tiles.py --arch_xml $architecture_file_path > $architecture_file_path_new`; + +# There is no need to copy the arch decription file as it is produced by the add_tiles.py script +#copy( "$architecture_file_path", $architecture_file_path_new ); $architecture_file_path = $architecture_file_path_new; my $circuit_file_path_new = "$temp_dir$benchmark_name" . file_ext_for_stage($starting_stage - 1, $circuit_suffix); diff --git a/vtr_flow/scripts/upgrade_arch.py b/vtr_flow/scripts/upgrade_arch.py index ef6dd8f7310..64cba982360 100755 --- a/vtr_flow/scripts/upgrade_arch.py +++ b/vtr_flow/scripts/upgrade_arch.py @@ -39,6 +39,7 @@ def __init__(self): "upgrade_port_equivalence", "upgrade_complex_sb_num_conns", "add_missing_comb_model_internal_timing_edges", + "move_top_level_pb_type_to_tiles", ] def parse_args(): @@ -137,6 +138,11 @@ def main(): if result: modified = True + if "move_top_level_pb_type_to_tiles" in args.features: + result = move_top_level_pb_type_to_tiles(arch) + if result: + modified = True + if modified: if args.debug: root.write(sys.stdout, pretty_print=args.pretty) @@ -155,7 +161,7 @@ def add_model_timing(arch): #Find all primitive pb types prim_pbs = arch.findall(".//pb_type[@blif_model]") - #Build up the timing specifications from + #Build up the timing specifications from default_models = frozenset([".input", ".output", ".latch", ".names"]) primitive_timing_specs = {} for prim_pb in prim_pbs: @@ -237,7 +243,7 @@ def upgrade_fc_overrides(arch): port = old_pin_override.attrib['name'] fc_type = old_pin_override.attrib['fc_type'] fc_val = old_pin_override.attrib['fc_val'] - + fc_tag.remove(old_pin_override) new_attrib = OrderedDict() @@ -285,7 +291,7 @@ def upgrade_fc_overrides(arch): new_attrib["fc_val"] = out_val fc_override = ET.SubElement(fc_tag, "fc_override", attrib=new_attrib) - + changed = True return changed @@ -350,7 +356,7 @@ def upgrade_device_layout(arch): device_auto.attrib['height'] = height else: assert False, "Unrecognized specification" - + if 0: for type, locs in type_to_grid_specs.iteritems(): print "Type:", type @@ -370,7 +376,7 @@ def upgrade_device_layout(arch): device_auto.text = "\n" + 2*INDENT device_auto.tail = "\n" - + for type_name, locs in type_to_grid_specs.iteritems(): for loc in locs: assert loc.tag == "loc" @@ -408,8 +414,8 @@ def upgrade_device_layout(arch): col_spec.attrib['priority'] = str(priority) col_spec.tail = "\n" + 2*INDENT - #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" - #instead of with the underlying type. To replicate that we create a col spec with the same + #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" + #instead of with the underlying type. To replicate that we create a col spec with the same #location information, but of type 'EMPTY' and with slightly lower priority than the real type. col_empty_spec = ET.SubElement(device_auto, 'col') @@ -451,8 +457,8 @@ def upgrade_device_layout(arch): col_spec.attrib['priority'] = str(priority) col_spec.tail = "\n" + 2*INDENT - #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" - #instead of with the underlying type. To replicate that we create a col spec with the same + #Classic VPR fills blank spaces (e.g. where a height > 1 block won't fit) with "EMPTY" + #instead of with the underlying type. To replicate that we create a col spec with the same #location information, but of type 'EMPTY' and with slightly lower priority than the real type. col_empty_spec = ET.SubElement(device_auto, 'col') col_empty_spec.attrib['type'] = "EMPTY" @@ -496,7 +502,7 @@ def upgrade_device_layout(arch): assert False, "Unrecognzied type tag {}".format(loc_type) return changed - + def remove_io_chan_distr(arch): """ Removes the legacy '' channel width distribution tags @@ -631,7 +637,7 @@ def upgrade_connection_block_input_switch(arch): # #Create the switch # - + switch_name = "ipin_cblock" #Make sure the switch name doesn't already exist @@ -673,7 +679,7 @@ def upgrade_switch_types(arch): assert switchlist_tag is not None for switch_tag in switchlist_tag.findall("./switch"): - + switch_type = switch_tag.attrib['type'] if switch_type in ['buffered', 'pass_trans']: @@ -710,7 +716,7 @@ def rename_fc_attributes(arch): def remove_longline_sb_cb(arch): """ Drops and of any types with length="longline", - since we now assume longlines have full switch block/connection block + since we now assume longlines have full switch block/connection block populations """ @@ -867,5 +873,88 @@ def add_missing_comb_model_internal_timing_edges(arch): return changed +def move_top_level_pb_type_to_tiles(arch): + """ + This script is intended to modify the architecture description file to be compliant with + the new format. + + It moves the top level pb_types attributes and tags to the tiles high-level tag. + + BEFORE: + + + + + + + + + + + + + + + + AFTER: + + + + + + + + + + + + + + + + + + """ + + changed = False + + TAGS_TO_SWAP = ['fc', 'pinlocations', 'switchblock_locations'] + ATTR_TO_REMOVE = ['area', 'height', 'width', 'capacity'] + + def swap_tags(tile, pb_type): + # Moving tags from top level pb_type to tile + for child in pb_type: + if child.tag in TAGS_TO_SWAP: + pb_type.remove(child) + tile.append(child) + + tiles = arch.find('tiles') + + if tiles is None: + tiles = ET.SubElement(arch, 'tiles') + + top_pb_types = [] + for pb_type in arch.iter('pb_type'): + if pb_type.getparent().tag == 'complexblocklist': + top_pb_types.append(pb_type) + + for pb_type in top_pb_types: + tile = ET.SubElement(tiles, 'tile') + attrs = pb_type.attrib + + for attr in attrs: + tile.set(attr, pb_type.get(attr)) + + # Remove attributes of top level pb_types only + for attr in ATTR_TO_REMOVE: + pb_type.attrib.pop(attr, None) + + swap_tags(tile, pb_type) + + changed = True + + return changed + if __name__ == "__main__": main() diff --git a/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt new file mode 100644 index 00000000000..7ec5b84e895 --- /dev/null +++ b/vtr_flow/tasks/regression_tests/vtr_reg_strong/strong_equivalent_tiles/config/config.txt @@ -0,0 +1,31 @@ +############################################## +# Configuration file for running experiments +############################################## + +# Path to directory of circuits to use +circuits_dir=benchmarks/microbenchmarks + +# Path to directory of architectures to use +archs_dir=arch/equivalent_tiles + +# Path to directory of SDC files to use +sdc_dir = sdc + +# Add circuits to list to sweep +circuit_list_add=carry_chain.blif + +# Add architectures to list to sweep +arch_list_add=slice.xml + +# Parse info and how to parse +parse_file=vpr_standard.txt + +# How to parse QoR info +qor_parse_file=qor_standard.txt + +# Pass requirements +pass_requirements_file=pass_requirements.txt + +# Script parameters +#script_params="" +script_params = -track_memory_usage -lut_size 1 -starting_stage vpr