Skip to content

Commit 46faff2

Browse files
committed
[Odin]: cleaning up the old soft mult node after splitting
Signed-off-by: Seyed Alireza Damghani <[email protected]>
1 parent 0121c41 commit 46faff2

10 files changed

+63
-9
lines changed

ODIN_II/.gitattributes

Lines changed: 0 additions & 1 deletion
This file was deleted.

ODIN_II/SRC/multipliers.cpp

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ void split_soft_multiplier(nnode_t* node, netlist_t* netlist);
6161
static mult_port_stat_e is_constant_multipication(nnode_t* node, netlist_t* netlist);
6262
static signal_list_t* implement_constant_multipication(nnode_t* node, mult_port_stat_e port_status, short mark, netlist_t* netlist);
6363
static nnode_t* perform_const_mult_optimization(mult_port_stat_e mult_port_stat, nnode_t* node, uintptr_t traverse_mark_number, netlist_t* netlist);
64+
static void cleanup_mult_old_node(nnode_t* nodeo, netlist_t* netlist);
6465

6566
// data structure representing a row of bits an adder tree
6667
struct AdderTreeRow {
@@ -1527,15 +1528,9 @@ void split_soft_multiplier(nnode_t* node, netlist_t* netlist) {
15271528
}
15281529
}
15291530

1530-
// Probably more to do here in freeing the old node!
1531-
vtr::free(node->name);
1532-
vtr::free(node->input_port_sizes);
1533-
vtr::free(node->output_port_sizes);
1531+
// CLEAN UP
1532+
cleanup_mult_old_node(node, netlist);
15341533

1535-
// Free arrays NOT the pins since relocated!
1536-
vtr::free(node->input_pins);
1537-
vtr::free(node->output_pins);
1538-
vtr::free(node);
15391534
}
15401535

15411536
/**
@@ -1757,6 +1752,58 @@ void clean_multipliers() {
17571752
return;
17581753
}
17591754

1755+
/**
1756+
* -------------------------------------------------------------------------
1757+
* (function: cleanup_mult_old_node)
1758+
*
1759+
* @brief <clean up nodeo, a high level MULT node>
1760+
* In split_soft_multplier function, nodeo is splitted to small multipliers,
1761+
* while because of the complexity of input pin connections they have not been
1762+
* remapped to new nodes, they just copied and added to new nodes. This function
1763+
* will detach input pins from the nodeo. Moreover, it will connect the net of
1764+
* unconnected output signals to the GND node, detach the pin from nodeo and
1765+
* free the output pins to avoid memory leak.
1766+
*
1767+
* @param nodeo representing the old adder node
1768+
* @param netlist representing the current netlist
1769+
*-----------------------------------------------------------------------*/
1770+
static void cleanup_mult_old_node(nnode_t* nodeo, netlist_t* netlist) {
1771+
int i;
1772+
/* Disconnecting input pins from the old node side */
1773+
for (i = 0; i < nodeo->num_input_pins; i++) {
1774+
nodeo->input_pins[i] = NULL;
1775+
}
1776+
1777+
/* connecting the extra output pins to the gnd node */
1778+
for (i = 0; i < nodeo->num_output_pins; i++) {
1779+
npin_t* output_pin = nodeo->output_pins[i];
1780+
1781+
if (output_pin && output_pin->node) {
1782+
/* for now we just pass the signals directly through */
1783+
npin_t* zero_pin = get_zero_pin(netlist);
1784+
int idx_2_buffer = zero_pin->pin_net_idx;
1785+
1786+
// Dont eliminate the buffer if there are multiple drivers or the AST included it
1787+
if (output_pin->net->num_driver_pins <= 1) {
1788+
/* join all fanouts of the output net with the input pins net */
1789+
join_nets(zero_pin->net, output_pin->net);
1790+
1791+
/* erase the pointer to this buffer */
1792+
zero_pin->net->fanout_pins[idx_2_buffer] = NULL;
1793+
}
1794+
1795+
free_npin(zero_pin);
1796+
free_npin(output_pin);
1797+
1798+
/* Disconnecting output pins from the old node side */
1799+
nodeo->output_pins[i] = NULL;
1800+
}
1801+
}
1802+
1803+
// CLEAN UP
1804+
free_nnode(nodeo);
1805+
}
1806+
17601807
void free_multipliers() {
17611808
if (hard_multipliers && hard_multipliers->instances) {
17621809
t_multiplier* tmp = (t_multiplier*)hard_multipliers->instances;

vtr_flow/benchmarks/verilog/koios/attention_layer.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
//`define SIMULATION_MEMORY
66
//`define SIMULATION_addfp
7+
78
`define VECTOR_DEPTH 64 //Q,K,V vector size
89
`define DATA_WIDTH 16
910
`define VECTOR_BITS 1024 // 16 bit each (16x64)

vtr_flow/benchmarks/verilog/koios/conv_layer_hls.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
// Abridged for VTR by: Daniel Rauch
1919
//////////////////////////////////////////////////////////////////////////////
2020

21+
2122
module dpram (
2223

2324
    clk,

vtr_flow/benchmarks/verilog/koios/dla_like.medium.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//4. Double-buffering after each layer.
1616
///////////////////////////////////////////////////////////////////////////////
1717

18+
1819
module DLA (
1920
input clk,
2021
input i_reset,

vtr_flow/benchmarks/verilog/koios/dla_like.small.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
//4. Double-buffering after each layer.
1616
///////////////////////////////////////////////////////////////////////////////
1717

18+
1819
module DLA (
1920
input clk,
2021
input i_reset,

vtr_flow/benchmarks/verilog/koios/eltwise_layer.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656
//section by section. The number of rows will be programmed
5757
//in the "iterations" register in the design.
5858

59+
5960
`define BFLOAT16
6061

6162
// IEEE Half Precision => EXPONENT = 5, MANTISSA = 10

vtr_flow/benchmarks/verilog/koios/gemm_layer.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
// with a simpler DSP (just a fixed point multiplier) like in the
2020
// flagship arch timing/k6_frac_N10_frac_chain_depop50_mem32K_40nm.xml
2121
/////////////////////////////////////////////////////////////////////////
22+
2223
`define BFLOAT16
2324

2425
// IEEE Half Precision => EXPONENT = 5, MANTISSA = 10

vtr_flow/benchmarks/verilog/koios/softmax.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//////////////////////////////////////////////////////////////////////////////
1515

1616
//softmax_p8_smem_rfloat16_alut_v512_b2_-0.1_0.1.v
17+
1718
`ifndef DEFINES_DONE
1819
`define DEFINES_DONE
1920
`define EXPONENT 5

vtr_flow/benchmarks/verilog/koios/tiny_darknet_like.small.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
//////////////////////////////////////////////////////////////////////////////
1717

1818
`timescale 1 ns / 1 ps
19+
1920
module td_fused_top_Block_entry_proc_proc392 (
2021
ap_clk,
2122
ap_rst,

0 commit comments

Comments
 (0)