diff --git a/ODIN_II/SRC/BLIFElaborate.cpp b/ODIN_II/SRC/BLIFElaborate.cpp index 5f45b962c41..2200043a959 100644 --- a/ODIN_II/SRC/BLIFElaborate.cpp +++ b/ODIN_II/SRC/BLIFElaborate.cpp @@ -485,7 +485,7 @@ static void resolve_arithmetic_nodes(nnode_t* node, uintptr_t traverse_mark_numb if (!hard_multipliers) check_constant_multipication(node, traverse_mark_number, netlist); else - check_multiplier_port_size(node); + check_multiplier_port_size(node, netlist); /* Adding to mult_list for future checking on hard blocks */ mult_list = insert_in_vptr_list(mult_list, node); diff --git a/ODIN_II/SRC/include/multipliers.h b/ODIN_II/SRC/include/multipliers.h index e22309a3638..7c8acae238e 100644 --- a/ODIN_II/SRC/include/multipliers.h +++ b/ODIN_II/SRC/include/multipliers.h @@ -56,7 +56,7 @@ extern void define_mult_function(nnode_t* node, FILE* out); extern void split_multiplier(nnode_t* node, int a0, int b0, int a1, int b1, netlist_t* netlist); extern void iterate_multipliers(netlist_t* netlist); extern bool check_constant_multipication(nnode_t* node, uintptr_t traverse_mark_number, netlist_t* netlist); -extern void check_multiplier_port_size(nnode_t* node); +extern void check_multiplier_port_size(nnode_t* node, netlist_t* netlist); extern bool is_ast_multiplier(ast_node_t* node); extern void clean_multipliers(); extern void free_multipliers(); diff --git a/ODIN_II/SRC/multipliers.cpp b/ODIN_II/SRC/multipliers.cpp index 6791ed5b68e..4cca645ad58 100644 --- a/ODIN_II/SRC/multipliers.cpp +++ b/ODIN_II/SRC/multipliers.cpp @@ -1765,68 +1765,41 @@ bool is_ast_multiplier(ast_node_t* node) { * ------------------------------------------------------------------------- * (function: check_multiplier_port_size) * - * If output size is less than the max of input sizes, the inputs should - * be pruned since the most significant bits are useless + * If output size is less than the sum of input sizes, + * we need to expand output pins with pad pins * - * @param node pointer to the multiplication node + * @param node pointer to the multiplication node + * @param netlist pointer to the current netlist file * ----------------------------------------------------------------------- */ -void check_multiplier_port_size(nnode_t* node) { +void check_multiplier_port_size(nnode_t* node, netlist_t* netlist) { /* Can only perform the optimisation if hard multipliers exist! */ if (hard_multipliers == NULL) return; int mula = node->input_port_sizes[0]; int mulb = node->input_port_sizes[1]; - int max = std::max(mula, mulb); + int sizeout = node->num_output_pins; + int limit = mula + mulb; /* check the output port size */ - if (node->num_output_pins < max) { - int limit = node->num_output_pins; - int new_mula = (mula > limit) ? limit : mula; - int new_mulb = (mulb > limit) ? limit : mulb; - npin_t** new_input_pins = (npin_t**)vtr::calloc(new_mula + new_mulb, sizeof(npin_t*)); - - /* handle mula */ - for (int i = 0; i < mula; i++) { - npin_t* input_pin = node->input_pins[i]; - /* detach from mul node */ - node->input_pins[i] = NULL; - - if (i < new_mula) { - new_input_pins[i] = input_pin; - } else { - /* detach from its net */ - remove_fanout_pins_from_net(input_pin->net, input_pin, input_pin->pin_net_idx); - /* free the pin */ - input_pin->node = NULL; - free_npin(input_pin); - } - } - - /* handle mulb */ - for (int i = 0; i < mulb; i++) { - npin_t* input_pin = node->input_pins[node->input_port_sizes[0] + i]; - /* detach from mul node */ - node->input_pins[node->input_port_sizes[0] + i] = NULL; - - if (i < new_mulb) { - new_input_pins[new_mula + i] = input_pin; - } else { - /* detach from its net */ - remove_fanout_pins_from_net(input_pin->net, input_pin, input_pin->pin_net_idx); - /* free the pin */ - input_pin->node = NULL; - free_npin(input_pin); - } + if (node->num_output_pins < limit) { + // Set the limit value as the number of output pins + node->num_output_pins = limit; + node->output_port_sizes[0] = limit; + // Keep record of old output pins pointer for cleaning up later + npin_t** old_output_pins = node->output_pins; + node->output_pins = (npin_t**)calloc(node->num_output_pins, sizeof(npin_t*)); + + // Move output pins to new array and adding pad pins in extra spots + for (int i = 0; i < node->num_output_pins; i++) { + if (i < sizeout) + node->output_pins[i] = old_output_pins[i]; + else + add_output_pin_to_node(node, get_pad_pin(netlist), i); } - - /* free old input pin list */ - vtr::free(node->input_pins); - node->input_pins = new_input_pins; - node->num_input_pins = new_mula + new_mulb; - node->input_port_sizes[0] = new_mula; - node->input_port_sizes[1] = new_mulb; + // CLEAN UP + vtr::free(old_output_pins); } } /*------------------------------------------------------------------------- diff --git a/ODIN_II/regression_test/benchmark/task/yosys+odin/koios/synthesis_result.json b/ODIN_II/regression_test/benchmark/task/yosys+odin/koios/synthesis_result.json index 0645e8d7078..ab2da506c95 100644 --- a/ODIN_II/regression_test/benchmark/task/yosys+odin/koios/synthesis_result.json +++ b/ODIN_II/regression_test/benchmark/task/yosys+odin/koios/synthesis_result.json @@ -4838,16 +4838,16 @@ "Latch Drivers": 1, "Pi": 2, "Po": 384, - "logic element": 7498, - "latch": 1876, - "Adder": 1188, + "logic element": 9718, + "latch": 2596, + "Adder": 1788, "Multiplier": 24, "Memory": 48, "generic logic size": 4, - "Longest Path": 129, + "Longest Path": 139, "Average Path": 4, - "Estimated LUTs": 7718, - "Total Node": 10635 + "Estimated LUTs": 9938, + "Total Node": 14175 }, "koios/softmax/k6FracN10LB_mem20K_complexDSP_customSB_22nm": { "test_name": "koios/softmax/k6FracN10LB_mem20K_complexDSP_customSB_22nm", diff --git a/ODIN_II/regression_test/benchmark/task/yosys+odin/large/synthesis_result.json b/ODIN_II/regression_test/benchmark/task/yosys+odin/large/synthesis_result.json index cdc8df6c636..a95180c3d6f 100644 --- a/ODIN_II/regression_test/benchmark/task/yosys+odin/large/synthesis_result.json +++ b/ODIN_II/regression_test/benchmark/task/yosys+odin/large/synthesis_result.json @@ -371,15 +371,15 @@ "Latch Drivers": 1, "Pi": 235, "Po": 305, - "logic element": 2927, - "latch": 1019, - "Adder": 411, + "logic element": 2938, + "latch": 1024, + "Adder": 413, "Multiplier": 15, "generic logic size": 4, "Longest Path": 110, "Average Path": 4, - "Estimated LUTs": 2984, - "Total Node": 4373 + "Estimated LUTs": 2995, + "Total Node": 4391 }, "large/paj_top_hierarchy_no_mem/k6_frac_N10_frac_chain_mem32K_40nm": { "test_name": "large/paj_top_hierarchy_no_mem/k6_frac_N10_frac_chain_mem32K_40nm", @@ -409,16 +409,16 @@ "Latch Drivers": 1, "Pi": 235, "Po": 305, - "logic element": 2705, - "latch": 1027, - "Adder": 411, + "logic element": 2716, + "latch": 1032, + "Adder": 413, "Multiplier": 15, "Memory": 21, "generic logic size": 4, "Longest Path": 111, "Average Path": 4, - "Estimated LUTs": 2764, - "Total Node": 4180 + "Estimated LUTs": 2775, + "Total Node": 4198 }, "large/spree/k6_frac_N10_frac_chain_mem32K_40nm": { "test_name": "large/spree/k6_frac_N10_frac_chain_mem32K_40nm", @@ -433,7 +433,7 @@ "Latch Drivers": 1, "Pi": 44, "Po": 32, - "logic element": 2640, + "logic element": 2656, "latch": 224, "Adder": 62, "Multiplier": 1, @@ -441,8 +441,8 @@ "generic logic size": 4, "Longest Path": 764, "Average Path": 3, - "Estimated LUTs": 2904, - "Total Node": 3056 + "Estimated LUTs": 2920, + "Total Node": 3072 }, "large/sv_chip0_hierarchy_no_mem/k6_frac_N10_frac_chain_mem32K_40nm": { "test_name": "large/sv_chip0_hierarchy_no_mem/k6_frac_N10_frac_chain_mem32K_40nm", diff --git a/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/simulation_result.json b/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/simulation_result.json index 511af4d9809..73b3ee1f03d 100644 --- a/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/simulation_result.json +++ b/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/simulation_result.json @@ -208,7 +208,7 @@ "simulation_time(ms)": 2335.8, "test_coverage(%)": 100, "Latch Drivers": 1, - "Pi": 528, + "Pi": 608, "Po": 144, "logic element": 232, "latch": 144, @@ -248,7 +248,7 @@ "simulation_time(ms)": 2281.1, "test_coverage(%)": 100, "Latch Drivers": 1, - "Pi": 528, + "Pi": 608, "Po": 144, "logic element": 496, "latch": 144, diff --git a/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/synthesis_result.json b/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/synthesis_result.json index cd693d71c81..d16153a37fd 100644 --- a/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/synthesis_result.json +++ b/ODIN_II/regression_test/benchmark/task/yosys+odin/micro/synthesis_result.json @@ -230,7 +230,7 @@ "techmap_time(ms)": 4.3, "synthesis_time(ms)": 234, "Latch Drivers": 1, - "Pi": 528, + "Pi": 608, "Po": 144, "logic element": 88, "latch": 144, @@ -274,7 +274,7 @@ "techmap_time(ms)": 7.3, "synthesis_time(ms)": 214, "Latch Drivers": 1, - "Pi": 528, + "Pi": 608, "Po": 144, "logic element": 352, "latch": 144, diff --git a/ODIN_II/regression_test/benchmark/task/yosys+odin/vtr/synthesis_result.json b/ODIN_II/regression_test/benchmark/task/yosys+odin/vtr/synthesis_result.json index 224a6f50b47..cf1e28e8e62 100644 --- a/ODIN_II/regression_test/benchmark/task/yosys+odin/vtr/synthesis_result.json +++ b/ODIN_II/regression_test/benchmark/task/yosys+odin/vtr/synthesis_result.json @@ -446,16 +446,16 @@ "Latch Drivers": 1, "Pi": 235, "Po": 305, - "logic element": 2705, - "latch": 1027, - "Adder": 411, + "logic element": 2716, + "latch": 1032, + "Adder": 413, "Multiplier": 15, "Memory": 21, "generic logic size": 4, "Longest Path": 111, "Average Path": 4, - "Estimated LUTs": 2764, - "Total Node": 4180 + "Estimated LUTs": 2775, + "Total Node": 4198 }, "vtr/sha/k6_frac_N10_frac_chain_mem32K_40nm": { "test_name": "vtr/sha/k6_frac_N10_frac_chain_mem32K_40nm", @@ -527,7 +527,7 @@ "Latch Drivers": 1, "Pi": 44, "Po": 32, - "logic element": 2640, + "logic element": 2656, "latch": 224, "Adder": 62, "Multiplier": 1, @@ -535,8 +535,8 @@ "generic logic size": 4, "Longest Path": 764, "Average Path": 3, - "Estimated LUTs": 2904, - "Total Node": 3056 + "Estimated LUTs": 2920, + "Total Node": 3072 }, "vtr/stereovision0/k6_frac_N10_frac_chain_mem32K_40nm": { "test_name": "vtr/stereovision0/k6_frac_N10_frac_chain_mem32K_40nm",