@@ -11,16 +11,12 @@ Problem statement: There are 12 robots on a board with 12 regions. The aim is fo
11
11
Q learning accelerator generated using HDL coder (Simulink / MATLAB)
12
12
4 actions
13
13
12 states
14
-
15
14
The design consists of 2 policy generators.
16
15
A random number generator is used as a ploicy generator during training .(mode = 0)
17
16
A policy generator where the action is based on the Maximum value of Q is used during inference. (mode = 1)
18
-
19
-
20
17
Misc:
21
18
ufix6_En4 : 6 bit fixed point number --> 2 bits for integer, 4 bits for the fractional part.
22
19
alpha = gamma = 0.5
23
-
24
20
*/
25
21
// //////////////////////////////////////////////////////////////////////////////////////
26
22
@@ -141,26 +137,19 @@ module Max
141
137
wire [31:0] in0 [0:3]; // int32 [4]
142
138
wire [31:0] Max_stage1_val [0:1]; // int32 [2]
143
139
wire [31:0] Max_stage2_val; // int32
144
-
145
-
146
140
assign in0[0] = in0_0;
147
141
assign in0[1] = in0_1;
148
142
assign in0[2] = in0_2;
149
143
assign in0[3] = in0_3;
150
-
151
144
// ---- Tree max implementation ----
152
145
// ---- Tree max stage 1 ----
153
146
assign Max_stage1_val[0] = (in0[0] >= in0[1] ? in0[0] :
154
147
in0[1]);
155
148
assign Max_stage1_val[1] = (in0[2] >= in0[3] ? in0[2] :
156
149
in0[3]);
157
-
158
-
159
-
160
150
// ---- Tree max stage 2 ----
161
151
assign Max_stage2_val = (Max_stage1_val[0] >= Max_stage1_val[1] ? Max_stage1_val[0] :
162
152
Max_stage1_val[1]);
163
-
164
153
*/
165
154
166
155
// wire [31:0] in0[0:3]; // int32 [4]
@@ -194,8 +183,8 @@ endmodule // Max
194
183
195
184
196
185
197
- // Simpledual_port_ram_generic : 4 RAM banks ( = no. of actions) with a depth of 12 ( = no. of states). Writes during training. Reads during inferfence.
198
- module Simpledual_port_ram_generic
186
+ // SimpleDualPortRAM_generic : 4 RAM banks ( = no. of actions) with a depth of 12 ( = no. of states). Writes during training. Reads during inferfence.
187
+ module SimpleDualPortRAM_generic
199
188
(clk,
200
189
enb,
201
190
wr_din,
@@ -235,7 +224,6 @@ module Simpledual_port_ram_generic
235
224
ram[2] = 32'h00000000;
236
225
ram[1] = 32'h00000000;
237
226
ram[0] = 32'h00000000;
238
-
239
227
data_int = 32'h00000000;
240
228
end
241
229
*/
@@ -275,7 +263,7 @@ dual_port_ram u_dual_port_ram(
275
263
276
264
assign rd_dout = data_int;
277
265
278
- endmodule // Simpledual_port_ram_generic
266
+ endmodule // SimpleDualPortRAM_generic
279
267
280
268
// Q_Hw: connects all the blocks and incorporates pipelining for appropriate syncing.
281
269
module Q_HW
@@ -545,7 +533,7 @@ assign Data_Type_Conversion_out1_3 = Data_Type_Conversion_out1_3;
545
533
.out0(Max_out1), // int16
546
534
.clk(clk));
547
535
548
- Simpledual_port_ram_generic #(.AddrWidth(4 ),
536
+ SimpleDualPortRAM_generic #(.AddrWidth(4 ),
549
537
.DataWidth(32 )
550
538
)
551
539
u_Simple_Dual_Port_RAM_System_bank3 (.clk(clk),
@@ -557,7 +545,7 @@ assign Data_Type_Conversion_out1_3 = Data_Type_Conversion_out1_3;
557
545
.rd_dout(pre_rd_out)
558
546
);
559
547
560
- Simpledual_port_ram_generic #(.AddrWidth(4 ),
548
+ SimpleDualPortRAM_generic #(.AddrWidth(4 ),
561
549
.DataWidth(32 )
562
550
)
563
551
u_Simple_Dual_Port_RAM_System_bank2 (.clk(clk),
@@ -569,7 +557,7 @@ assign Data_Type_Conversion_out1_3 = Data_Type_Conversion_out1_3;
569
557
.rd_dout(pre_rd_out_1)
570
558
);
571
559
572
- Simpledual_port_ram_generic #(.AddrWidth(4 ),
560
+ SimpleDualPortRAM_generic #(.AddrWidth(4 ),
573
561
.DataWidth(32 )
574
562
)
575
563
u_Simple_Dual_Port_RAM_System_bank1 (.clk(clk),
@@ -5090,4 +5078,3 @@ robot_high_level robot_12 ( 4'd11, clk, reset, mode, Q_12);
5090
5078
endmodule
5091
5079
5092
5080
5093
-
0 commit comments