Oliver Jaison 4 vuotta sitten
vanhempi
commit
a67e2bb430
11 muutettua tiedostoa jossa 453 lisäystä ja 358 poistoa
  1. 1 0
      .gitignore
  2. 9 7
      Makefile
  3. 14 5
      readme.md
  4. 43 11
      simulation/modelsim/sim_neural.do
  5. 24 0
      simulation/modelsim/wave_floating32_tb.do
  6. 62 110
      src/FPA_module_test.sv
  7. 43 41
      src/fpu32/adder.v
  8. 1 1
      src/fpu32/fpu32.sv
  9. 48 45
      src/fpu32/mult.v
  10. 81 123
      src/neural/comp.sv
  11. 127 15
      src/neural/neuron.sv

+ 1 - 0
.gitignore

@@ -11,6 +11,7 @@
 !*.sv
 !*.py
 !sim_*.do
+!wave_*_tb.do
 !*.qip
 
 # Making sure nothing from there will be picked up

+ 9 - 7
Makefile

@@ -31,11 +31,13 @@ QUARTUS_MACROS = --set VERILOG_MACRO="SYNTHESIS=1"
 VSIM_ARGS = -L altera_ver -L lpm_ver -L sgate_ver -L altera_mf_ver -L altera_lnsim_ver -L cycloneive_ver -voptargs="+acc"
 
 
-### Optional parameters
-tb_file ?=
-tb_dir = $(dirname "${testbench_file}")
-tb_mod ?=
-do_file ?=
+### VERILOG SOURCE FILES
+## It finds all verilog files in src/*.sv and
+## also includes subdirectories that has syntax
+## src/{MOD_NAME}/{MOD_NAME}.sv or src/{MOD_NAME}/include.sv
+###
+VERILOG_SRC=$(foreach SRC,$(sort $(dir $(wildcard ./src/*/*.sv))),$(wildcard $(SRC)$(shell basename $(SRC)).sv $(SRC)include.sv)) $(wildcard ./src/*.sv)
+$(info VERILOG_SRC=$(VERILOG_SRC))
 SIM_DIR ?= ./simulation/modelsim
 
 ### ================================================================
@@ -57,8 +59,8 @@ modelsim_cli:
 %.gui: ${SIM_DIR}/%.do
 	${MODELSIM_BIN} -gui -do "$<"
 
-testbench:
-	${MODELSIM_BIN} -c -do "vlog -sv +incdir+${tb_dir} {${tb_file}}; vsim -t 1ps ${VSIM_ARGS} ${tb_mod}; run -all"
+%_tb:
+	${MODELSIM_BIN} -gui -do "$(foreach SRC,$(VERILOG_SRC),vlog -sv {${SRC}};) vsim -t 1ps ${VSIM_ARGS} ${@}; if { [file exists ${SIM_DIR}/wave_${@}.do ] == 1} { do ${SIM_DIR}/wave_${@}.do }"
 
 sim_fpa_mod.do:
 	cd ./simulation/modelsim && ${MODELSIM_BIN} -gui -do $@

+ 14 - 5
readme.md

@@ -14,12 +14,19 @@ make modelsim
 
 ### Running testbench
 
-This will run test testbench in console without opening modelsim GUI
+Running testbench directly on GUI
 ```bash
-make tb_file=${file} tb_mod=${module} testbench
-# Example
-make tb_file=./src/root.sv tb_mod=root_tb testbench
+make my_module_tb
 ```
+This includes all modules from src/*.sv and subdirectories that contains __main__ system verilog file with the same name as 
+subdirectory or include.sv
+
+Any other system verilog files in subdirectory can be included using `` `_include {FILE.sv} `` in subdirectory main file. 
+
+This command will also include saved wave instructions that are located in **simulation/modelsim/wave_${my_module_tb}.do** 
+
+
+### Other testbench methods
 Running testbench with defined simulation tcl script.
 Scripts has be located in **simulation/modelsim/sim_\*.do**
 ```bash
@@ -27,4 +34,6 @@ Scripts has be located in **simulation/modelsim/sim_\*.do**
 make sim_root_tb.gui
 # Without GUI
 make sim_root_tb.cli
-```
+```
+
+

Tiedoston diff-näkymää rajattu, sillä se on liian suuri
+ 43 - 11
simulation/modelsim/sim_neural.do


+ 24 - 0
simulation/modelsim/wave_floating32_tb.do

@@ -0,0 +1,24 @@
+onerror {resume}
+quietly WaveActivateNextPane {} 0
+add wave -noupdate -label INPUT_A -radix float32 /floating32_tb/input_a
+add wave -noupdate -label INPUT_B -radix float32 /floating32_tb/input_b
+add wave -noupdate -label RESULT_ADD -radix float32 /floating32_tb/result_add
+add wave -noupdate -label RESULT_MULT -radix float32 /floating32_tb/result_mult
+TreeUpdate [SetDefaultTree]
+WaveRestoreCursors {{Cursor 1} {0 ps} 0}
+quietly wave cursor active 0
+configure wave -namecolwidth 150
+configure wave -valuecolwidth 100
+configure wave -justifyvalue left
+configure wave -signalnamewidth 0
+configure wave -snapdistance 10
+configure wave -datasetprefix 0
+configure wave -rowmargin 4
+configure wave -childrowmargin 2
+configure wave -gridoffset 0
+configure wave -gridperiod 1
+configure wave -griddelta 40
+configure wave -timeline 0
+configure wave -timelineunits ns
+update
+WaveRestoreZoom {0 ps} {47 ps}

+ 62 - 110
src/FPA_module_test.sv

@@ -1,204 +1,156 @@
-module floating_add #(parameter N=16, M=4)(input_1, input_2, sum, diff, clk, reset);
+module floating_add #(parameter N=16, M=4)(input_1, input_2, sum, diff);
 	input logic [N-1:0] input_1, input_2;
-	input logic clk, reset;
 	output logic [N-1:0] sum;
 	output logic [M:0] diff;
 
-//	logic flag_a;
-//	logic flag_b;
-//	logic [M:0] abs;
-//	logic [N-3-M:0] res;
-	logic [N-1:0] D0 [7:0];
-	logic [N-1:0] Q0 [7:0];
-	logic [N-1:0] Q1 [7:0];
-	logic [N-1:0] Q2 [7:0];
+	logic flag_a;
+	logic flag_b;
+	logic [M:0] abs;
+	logic [N-3-M:0] res;
 	
 	// sign_x = x[N-1]
 	// exponent_x = x[N-2:N-2-M]
 	// mantissa_x = x[N-3-M:0]
 	
-	// Pipeline stage 0
-	always_comb
-		begin
-			D0[0] = input_1;
-			D0[1] = input_2;
-			D0[2] = 0 // sum
-			D0[3] = 0 // diff
-			D0[4] = 0 // flag_a
-			Do[5] = 0 // flag_b
-			D0[6] = 0 // abs
-			D0[7] = 0 // res
-		end
-		
-		pipe pipe_0(.clk(clk), .reset(reset), .D(D0), .Q(Q0));
-	
 	always_comb
 		begin
-			if (Q0[0][N-2:N-2-M] > Q0[1][N-2:N-2-M]) // If input 1 has the bigger exponent 
+			if (input_1[N-2:N-2-M] > input_2[N-2:N-2-M]) // If input 1 has the bigger exponent 
 				begin
 					// Flags input a as larger and calculates the absolute difference
-					Q0]4] = 1;
-					Q0[5] = 0;
-					Q0[6] = Q0[0][N-2:N-2-M] - Q0[1][N-2:N-2-M];
+					flag_a = 1;
+					flag_b = 0;
+					abs = input_1[N-2:N-2-M] - input_2[N-2:N-2-M];
 					// ASsigning overall sign of the output
-					Q0[2][N-1] = Q0[0][N-1];
+					sum[N-1] = input_1[N-1];
 					// Sets output to have the same exponent
-					Q0[2][N-2:N-2-M] = Q0[0][N-2:N-2-M];
+					sum[N-2:N-2-M] = input_1[N-2:N-2-M];
 				end
-			else if (Q0[1][N-2:N-2-M] > Q0[0][N-2:N-2-M]) // If input 2 has the bigger exponent
+			else if (input_2[N-2:N-2-M] > input_1[N-2:N-2-M]) // If input 2 has the bigger exponent
 				begin
 					// Similarly flags input b as larger and calculates the absolute difference
-					Q0[4] = 0;
-					Q0[5] = 1;
-					Q0[6] = Q0[1][N-2:N-2-M] - Q0[0][N-2:N-2-M];
+					flag_a = 0;
+					flag_b = 1;
+					abs = input_2[N-2:N-2-M] - input_1[N-2:N-2-M];
 					// ASsigning overall sign of the output
-					Q0[2][N-1] = Q0[1][N-1];
+					sum[N-1] = input_2[N-1];
 					// Sets ouput to have the same exponent
-					Q0[2][N-2:N-2-M] = Q0[1][N-2:N-2-M];
+					sum[N-2:N-2-M] = input_2[N-2:N-2-M];
 				end
 			else 
 				begin
 					// THe condition that both inputs have the same exponent
-					Q0[4] = 1;
-					Q0[5] = 1;
-					Q0[6] = 0;
+					flag_a = 1;
+					flag_b = 1;
+					abs = 0;
 					// ASsigning overall sign of the output based on size of the mantissa
-					if (Q0[0][N-3-M:0] >= Q0[1][N-3-M:0]) sum[N-1] = Q0[0][N-1];
-					else Q0[2][N-1] = Q0[1][N-1];
-					Q0[2][N-2:N-2-M] = Q0[0][N-2:N-2-M];
+					if (input_1[N-3-M:0] >= input_2[N-3-M:0]) sum[N-1] = input_1[N-1];
+					else sum[N-1] = input_2[N-1];
+					sum[N-2:N-2-M] = input_1[N-2:N-2-M];
 				end
-			Q0[3] = Q0[6];
+			diff = abs;
 		end
 		
-		//Pipeline stage 1
-		pipe pipe_1(.clk(clk), .reset(reset), .D(Q0), .Q(Q1));
-		
 	always_comb
 		begin
 			// Condition for overflow is that it sets the output to the larger input
-			if (Q1[6] > N-M-2) // Because size of mantissa is 10 bits and shifting by 10 would give 0
+			if (abs > 9) // Because size of mantissa is 10 bits and shifting by 10 would give 0
 				begin
-					if (Q1[4] & ~Q1[5]) Q1[2] = Q1[0]; // input 1 is larger and is translated to output
-					else if (~Q1[4] & Q1[5]) Q1[2] = Q1[1]; // input 2 is larger and is translated to output
+					if (flag_a & ~flag_b) sum = input_1; // input 1 is larger and is translated to output
+					else if (~flag_a & flag_b) sum = input_2; // input 2 is larger and is translated to output
 					else // exponents are the same
 						begin
-							if (Q1[0][N-3-M:0] >= Q1[1][N-3-M:0]) Q1[2] = Q1[0];// input 1 has the bigger mantissa
-							else Q1[2] = Q1[1]; // input 2 has the bigger mantissa
+							if (input_1[N-3-M:0] >= input_2[N-3-M:0]) sum = input_1;// input 1 has the bigger mantissa
+							else sum = input_2; // input 2 has the bigger mantissa
 						end
 				end
 			else
 				begin
 					// Shifts the smaller input's mantissa to the right based on abs
-					if (Q1[4] & ~Q1[5])// If input 1 has the larger exponent
+					if (flag_a & ~flag_b)// If input 1 has the larger exponent
 						begin
 							// If the signs of both inputs are the same you add, otherwise you subtract
-							if (Q1[0][N-1] == Q1[1][N-1])
+							if (input_1[N-1] == input_2[N-1])
 								begin
-									Q1[7] = Q1[0][N-3-M:0] + (Q1[1][N-3-M:0] >> Q1[6]-1); // Sum the mantissa
-									Q1[2][N-3-M:0] = Q1[7];
+									res = input_1[N-3-M:0] + (input_2[N-3-M:0] >> abs-1); // Sum the mantissa
+									sum[N-3-M:0] = res;
 								end
 							else
 								begin
-									Q1[7] = Q1[0][N-3-M:0] - (Q1[1][N-3-M:0] >> Q1[6]-1); // Subtract the mantissas
-									Q1[2][N-3-M:0] = Q1[7];
+									res = input_1[N-3-M:0] - (input_2[N-3-M:0] >> abs-1); // Subtract the mantissas
+									sum[N-3-M:0] = res;
 								end
 						end
-					else if (~Q1[4] & Q1[5])
+					else if (~flag_a & flag_b)
 						begin
 							// If the signs of both inputs are the same you add, otherwise you subtract
-							if (Q1[0][N-1] == Q1[1][N-1])
+							if (input_1[N-1] == input_2[N-1])
 								begin
-									Q1[7] = (Q1[0][N-3-M:0] >> Q1[6]-1) + Q1[1][N-3-M:0]; // Sum the mantissa
-									Q1[2][N-3-M:0] = Q1[7];
+									res = (input_1[N-3-M:0] >> abs-1) + input_2[N-3-M:0]; // Sum the mantissa
+									sum[N-3-M:0] = res;
 								end
 							else
 								begin
-									Q1[7] = Q1[1][N-3-M:0] - (Q1[0][N-3-M:0] >> Q1[6]-1); // Subtract the mantissas
-									Q1[2][N-3-M:0] = Q1[7];
+									res = input_2[N-3-M:0] - (input_1[N-3-M:0] >> abs-1); // Subtract the mantissas
+									sum[N-3-M:0] = res;
 								end
 						end
 					else
 						begin 
-							if (Q1[0][N-1] == Q1[1][N-1]) // If exponents and signs equal
+							if (input_1[N-1] == input_2[N-1]) // If exponents and signs equal
 								begin
-									Q1[7] = Q1[0][N-3-M:0] + Q1[1][N-3-M:0]; // Sum the mantissa
-									Q1[2][N-3-M:0] = Q1[7];
+									res = input_1[N-3-M:0] + input_2[N-3-M:0]; // Sum the mantissa
+									sum[N-3-M:0] = res;
 								end
 							else // In this case it will be a subtraction
 								begin
-									if (Q1[0][N-3-M:0] > Q1[1][N-3-M:0]) // Which has the larger mantissa 
+									if (input_1[N-3-M:0] > input_2[N-3-M:0]) // Which has the larger mantissa 
 										begin
-											Q1[7] = Q1[0][N-3-M:0] - Q1[1][N-3-M:0]; // Subtract the mantissa
-											Q1[2][N-3-M:0] = Q1[7];
+											res = input_1[N-3-M:0] - input_2[N-3-M:0]; // Subtract the mantissa
+											sum[N-3-M:0] = res;
 										end
-									else if (Q1[0][N-3-M:0] < Q1[1][N-3-M:0])
+									else if (input_1[N-3-M:0] < input_2[N-3-M:0])
 										begin
-											Q1[7] = Q1[1][N-3-M:0] - Q1[0][N-3-M:0]; // Subtract the mantissa
-											Q1[2][N-3-M:0] = Q1[7];
+											res = input_2[N-3-M:0] - input_1[N-3-M:0]; // Subtract the mantissa
+											sum[N-3-M:0] = res;
 										end
-									else Q1[7] = 0; // Both the exponent and the mantissa are equal so subtraction leads to 0
-									Q1[2][N-3-M:0] = Q1[7];
+									else res = 0; // Both the exponent and the mantissa are equal so subtraction leads to 0
+									sum[N-3-M:0] = res;
 								end
 						end
 				end
 		end
-		
-		//Final pipeline stage 2
-		pipe pipe2(.clk(clk), .reset(reset), .D(Q1), .Q(Q2));
-		assign sum = Q2[2];
-		assign diff = Q2[3];
 endmodule : floating_add
 
 
 
-module floating_product #(parameter N=16, M=4)(input_1, input_2, product, clk, reset);
+module floating_product #(parameter N=16, M=4)(input_1, input_2, product);
 	input logic [N-1:0] input_1, input_2;
-	input logic clk, reset;
 	output logic [N-1:0] product;
 
 	// sign_x = x[N-1]
 	// exponent_x = x[N-2:N-2-M]
 	// mantissa_x = x[N-3-M:0]
 
-//	logic [N-2:N-2-M] sum;
-//	logic [2*(N-3-M):0] mult;
-	logic [2*(N-3-M):0] D0 [4:0];
-	logic [2*(N-3-M):0] Q0 [4:0];
-	logic [2*(N-3-M):0] Q1 [4:0];
-	logic [2*(N-3-M):0] Q2 [4:0];
-	
-	// First pipeline stage 0
-	assign D0[0] = input_1;
-	assign D0[1] = input_2;
-	assign D0[2] = 0; // product
-	assign D0[3] = 0; // sum
-	assign D0[4] = 0; // mult
-	pipe pipe0 #(N = 32, K = 4)(.clk(clk), .reset(reset), .D(D0), .Q(Q0));
+	logic [N-2:N-2-M] sum;
+	logic [2*(N-3-M):0] mult;
 
 	// We have assigned an {M+1} bit exponent so we must have a 2^{M} offset
-	assign Q0[3] = Q0[0][N-2:N-2-M] + Q0[1][N-2:N-2-M];
-	assign Q0[2][N-2:N-2-M] = Q0[3] - (1'b1 << M) + 2;
-	
-	// Second pipeline stage 1
-	pipe pipe1 #(N = 32, K = 4)(.clk(clk), .reset(reset), .D(Q0), .Q(Q1));
+	assign sum = input_1[N-2:N-2-M] + input_2[N-2:N-2-M];
+	assign product[N-2:N-2-M] = sum - (1'b1 << M) + 2;
 
 	always_comb
 		begin
 				// Setting the mantissa of the output
-				Q1[4] = Q1[0][N-3-M:0] * Q1[1][N-3-M:0];
-				if (Q1[4][N-3-M]) Q1[2][N-3-M:0] = Q1[4][2*(N-3-M):2*(N-3-M)-9];
-				else Q1[2][N-3-M:0] = Q1[4][2*(N-3-M):2*(N-3-M)-9] << 1;
-				Q1[2][N-1] = Q1[0][N-1] ^ Q1[1][N-1];
+				mult = input_1[N-3-M:0] * input_2[N-3-M:0];
+				if (mult[N-3-M]) product[N-3-M:0] = mult[2*(N-3-M):2*(N-3-M)-9];
+				else product[N-3-M:0] = mult[2*(N-3-M):2*(N-3-M)-9] << 1;
+				product[N-1] = input_1[N-1] ^ input_2[N-1];
 		end
-		
-		// Final Pipeline Stage 2
-		pipe pipe2 #(N = 32, K = 4)(.clk(clk), .reset(reset), .D(Q1), .Q(Q2));
-		assign product = Q2[2][N-1:0];
 endmodule : floating_product
 
 
 
-module pipe #(parameter N = 16, K = 7)pipe(clk, reset, D, Q);
+module pipe #(parameter N=16)(clk, reset, Q, D);
 	input logic clk, reset;
 	input logic [N-1:0] D [K:0];
 	output reg [N-1:0] Q [K:0];

+ 43 - 41
src/fpu32/adder.v

@@ -2,6 +2,22 @@
 //Copyright (C) Jonathan P Dawson 2013
 //2013-12-12
 
+typedef enum logic [3:0] {
+  add_unpack,
+  add_special,
+  add_align,
+  add_0,
+  add_1,
+  add_norm_0,
+  add_norm_1,
+  add_round,
+  add_pack,
+  add_output,
+  add_input
+
+} adder_state;
+
+
 module adder(
         clk,
         rst,
@@ -31,21 +47,7 @@ module adder(
   reg       [31:0] s_output_z;
   reg       s_input_ack;
 
-  reg       [3:0] state;
-	
-  parameter get_a         = 4'd0,
-            get_b         = 4'd1,
-            unpack        = 4'd2,
-            special_cases = 4'd3,
-            align         = 4'd4,
-            add_0         = 4'd5,
-            add_1         = 4'd6,
-            normalise_1   = 4'd7,
-            normalise_2   = 4'd8,
-            round         = 4'd9,
-            pack          = 4'd10,
-            put_z         = 4'd11,
-            get_input     = 4'd12;
+  adder_state state;
 
   reg       [31:0] a, b, z;
   reg       [26:0] a_m, b_m;
@@ -78,20 +80,20 @@ module adder(
 //        end
 //      end
 
-      get_input:
+      add_input:
       begin
         s_input_ack <= 1;
         if (s_input_ack && input_stb) begin
           a <= input_a;
           b <= input_b;
           s_input_ack <= 0;
-          state <= unpack;
+          state <= add_unpack;
         end
       end
 
 
 
-      unpack:
+      add_unpack:
       begin
         a_m <= {a[22 : 0], 3'd0};
         b_m <= {b[22 : 0], 3'd0};
@@ -99,18 +101,18 @@ module adder(
         b_e <= b[30 : 23] - 127;
         a_s <= a[31];
         b_s <= b[31];
-        state <= special_cases;
+        state <= add_special;
       end
 
-      special_cases:
+      add_special:
       begin
         //if a is NaN return a
         if (a_e == 128 && a_m != 0) begin
           z <= {a_s, 8'hff, a[22], a[21:0]};
-          state <= put_z;
+          state <= add_output;
         end else if (b_e == 128 && b_m != 0) begin
           z <= {b_s, 8'hff, b[22:0]};
-          state <= put_z;
+          state <= add_output;
         //if a is inf return inf
         end else if (a_e == 128 && a_m == 0) begin
           z[31] <= a_s;
@@ -123,31 +125,31 @@ module adder(
               z[22] <= 1;
               z[21:0] <= 0;
           end
-          state <= put_z;
+          state <= add_output;
         //if b is inf return inf
         end else if (b_e == 128 && b_m == 0) begin
           z[31] <= b_s;
           z[30:23] <= 255;
           z[22:0] <= 0;
-          state <= put_z;
+          state <= add_output;
         //if a is zero return b
         end else if ((($signed(a_e) == -127) && (a_m == 0)) && (($signed(b_e) == -127) && (b_m == 0))) begin
           z[31] <= a_s & b_s;
           z[30:23] <= b_e[7:0] + 127;
           z[22:0] <= b_m[26:3];
-          state <= put_z;
+          state <= add_output;
         //if a is zero return b
         end else if (($signed(a_e) == -127) && (a_m == 0)) begin
           z[31] <= b_s;
           z[30:23] <= b_e[7:0] + 127;
           z[22:0] <= b_m[26:3];
-          state <= put_z;
+          state <= add_output;
         //if b is zero return a
         end else if (($signed(b_e) == -127) && (b_m == 0)) begin
           z[31] <= a_s;
           z[30:23] <= a_e[7:0] + 127;
           z[22:0] <= a_m[26:3];
-          state <= put_z;
+          state <= add_output;
         end else begin
           //Denormalised Number
           if ($signed(a_e) == -127) begin
@@ -161,11 +163,11 @@ module adder(
           end else begin
             b_m[26] <= 1;
           end
-          state <= align;
+          state <= add_align;
         end
       end
 
-      align:
+      add_align:
       begin
         if ($signed(a_e) > $signed(b_e)) begin
           b_e <= b_e + 1;
@@ -212,10 +214,10 @@ module adder(
           round_bit <= sum[1];
           sticky <= sum[0];
         end
-        state <= normalise_1;
+        state <= add_norm_0;
       end
 
-      normalise_1:
+      add_norm_0:
       begin
         if (z_m[23] == 0 && $signed(z_e) > -126) begin
           z_e <= z_e - 1;
@@ -224,11 +226,11 @@ module adder(
           guard <= round_bit;
           round_bit <= 0;
         end else begin
-          state <= normalise_2;
+          state <= add_norm_1;
         end
       end
 
-      normalise_2:
+      add_norm_1:
       begin
         if ($signed(z_e) < -126) begin
           z_e <= z_e + 1;
@@ -237,11 +239,11 @@ module adder(
           round_bit <= guard;
           sticky <= sticky | round_bit;
         end else begin
-          state <= round;
+          state <= add_round;
         end
       end
 
-      round:
+      add_round:
       begin
         if (guard && (round_bit | sticky | z_m[0])) begin
           z_m <= z_m + 1;
@@ -249,10 +251,10 @@ module adder(
             z_e <=z_e + 1;
           end
         end
-        state <= pack;
+        state <= add_pack;
       end
 
-      pack:
+      add_pack:
       begin
         z[22 : 0] <= z_m[22:0];
         z[30 : 23] <= z_e[7:0] + 127;
@@ -269,23 +271,23 @@ module adder(
           z[30 : 23] <= 255;
           z[31] <= z_s;
         end
-        state <= put_z;
+        state <= add_output;
       end
 
-      put_z:
+      add_output:
       begin
         s_output_z_stb <= 1;
         s_output_z <= z;
         if (s_output_z_stb && output_z_ack) begin
           s_output_z_stb <= 0;
-          state <= get_input;
+          state <= add_input;
         end
       end
 
     endcase
 
     if (rst == 1) begin
-      state <= get_input;
+      state <= add_input;
       s_input_ack <= 0;
       s_output_z_stb <= 0;
     end

+ 1 - 1
src/fpu32/fpu32.sv

@@ -1,4 +1,4 @@
-`include "adder.v"
+`include "adder.sv"
 `include "mult.v"
 
 // synopsys translate_off

+ 48 - 45
src/fpu32/mult.v

@@ -2,6 +2,23 @@
 //Copyright (C) Jonathan P Dawson 2013
 //2013-12-12
 
+typedef enum logic [3:0] {
+  mul_unpack,
+  mul_special,
+  mul_norm_a,
+  mul_norm_b,
+  mul_0,
+  mul_1,
+  mul_norm_1,
+  mul_norm_2,
+  mul_round,
+  mul_pack,
+  mul_output,
+  mul_input
+
+} mult_state;
+
+
 module multiplier(
         input_a,
         input_b,
@@ -31,21 +48,7 @@ module multiplier(
   reg       [31:0] s_output_z;
   reg       s_input_ack;
 
-  reg       [3:0] state;
-  parameter get_a         = 4'd0,
-            get_b         = 4'd1,
-            unpack        = 4'd2,
-            special_cases = 4'd3,
-            normalise_a   = 4'd4,
-            normalise_b   = 4'd5,
-            multiply_0    = 4'd6,
-            multiply_1    = 4'd7,
-            normalise_1   = 4'd8,
-            normalise_2   = 4'd9,
-            round         = 4'd10,
-            pack          = 4'd11,
-            put_z         = 4'd12,
-            get_input     = 4'd13;
+  mult_state state;
 
   reg       [31:0] a, b, z;
   reg       [23:0] a_m, b_m, z_m;
@@ -79,18 +82,18 @@ module multiplier(
 //        end
 //      end
 
-      get_input:
+      mul_input:
       begin
         s_input_ack <= 1;
         if (s_input_ack && input_stb) begin
           a <= input_a;
           b <= input_b;
           s_input_ack <= 0;
-          state <= unpack;
+          state <= mul_unpack;
         end
       end
 
-      unpack:
+      mul_unpack:
       begin
         a_m <= a[22 : 0];
         b_m <= b[22 : 0];
@@ -98,10 +101,10 @@ module multiplier(
         b_e <= b[30 : 23] - 127;
         a_s <= a[31];
         b_s <= b[31];
-        state <= special_cases;
+        state <= mul_special;
       end
 
-      special_cases:
+      mul_special:
       begin
         //if a is NaN or b is NaN return NaN
         if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
@@ -109,7 +112,7 @@ module multiplier(
           z[30:23] <= 255;
           z[22] <= 1;
           z[21:0] <= 0;
-          state <= put_z;
+          state <= mul_output;
         //if a is inf return inf
         end else if (a_e == 128) begin
           z[31] <= a_s ^ b_s;
@@ -122,7 +125,7 @@ module multiplier(
             z[22] <= 1;
             z[21:0] <= 0;
           end
-          state <= put_z;
+          state <= mul_output;
         //if b is inf return inf
         end else if (b_e == 128) begin
           z[31] <= a_s ^ b_s;
@@ -135,19 +138,19 @@ module multiplier(
             z[22] <= 1;
             z[21:0] <= 0;
           end
-          state <= put_z;
+          state <= mul_output;
         //if a is zero return zero
         end else if (($signed(a_e) == -127) && (a_m == 0)) begin
           z[31] <= a_s ^ b_s;
           z[30:23] <= 0;
           z[22:0] <= 0;
-          state <= put_z;
+          state <= mul_output;
         //if b is zero return zero
         end else if (($signed(b_e) == -127) && (b_m == 0)) begin
           z[31] <= a_s ^ b_s;
           z[30:23] <= 0;
           z[22:0] <= 0;
-          state <= put_z;
+          state <= mul_output;
         end else begin
           //Denormalised Number
           if ($signed(a_e) == -127) begin
@@ -161,48 +164,48 @@ module multiplier(
           end else begin
             b_m[23] <= 1;
           end
-          state <= normalise_a;
+          state <= mul_norm_a;
         end
       end
 
-      normalise_a:
+      mul_norm_a:
       begin
         if (a_m[23]) begin
-          state <= normalise_b;
+          state <= mul_norm_b;
         end else begin
           a_m <= a_m << 1;
           a_e <= a_e - 1;
         end
       end
 
-      normalise_b:
+      mul_norm_b:
       begin
         if (b_m[23]) begin
-          state <= multiply_0;
+          state <= mul_0;
         end else begin
           b_m <= b_m << 1;
           b_e <= b_e - 1;
         end
       end
 
-      multiply_0:
+      mul_0:
       begin
         z_s <= a_s ^ b_s;
         z_e <= a_e + b_e + 1;
         product <= a_m * b_m * 4;
-        state <= multiply_1;
+        state <= mul_1;
       end
 
-      multiply_1:
+      mul_1:
       begin
         z_m <= product[49:26];
         guard <= product[25];
         round_bit <= product[24];
         sticky <= (product[23:0] != 0);
-        state <= normalise_1;
+        state <= mul_norm_1;
       end
 
-      normalise_1:
+      mul_norm_1:
       begin
         if (z_m[23] == 0) begin
           z_e <= z_e - 1;
@@ -211,11 +214,11 @@ module multiplier(
           guard <= round_bit;
           round_bit <= 0;
         end else begin
-          state <= normalise_2;
+          state <= mul_norm_2;
         end
       end
 
-      normalise_2:
+      mul_norm_2:
       begin
         if ($signed(z_e) < -126) begin
           z_e <= z_e + 1;
@@ -224,11 +227,11 @@ module multiplier(
           round_bit <= guard;
           sticky <= sticky | round_bit;
         end else begin
-          state <= round;
+          state <= mul_round;
         end
       end
 
-      round:
+      mul_round:
       begin
         if (guard && (round_bit | sticky | z_m[0])) begin
           z_m <= z_m + 1;
@@ -236,10 +239,10 @@ module multiplier(
             z_e <=z_e + 1;
           end
         end
-        state <= pack;
+        state <= mul_pack;
       end
 
-      pack:
+      mul_pack:
       begin
         z[22 : 0] <= z_m[22:0];
         z[30 : 23] <= z_e[7:0] + 127;
@@ -253,23 +256,23 @@ module multiplier(
           z[30 : 23] <= 255;
           z[31] <= z_s;
         end
-        state <= put_z;
+        state <= mul_output;
       end
 
-      put_z:
+      mul_output:
       begin
         s_output_z_stb <= 1;
         s_output_z <= z;
         if (s_output_z_stb && output_z_ack) begin
           s_output_z_stb <= 0;
-          state <= get_input;
+          state <= mul_input;
         end
       end
 
     endcase
 
     if (rst == 1) begin
-      state <= get_input;
+      state <= mul_input;
       s_input_ack <= 0;
       s_output_z_stb <= 0;
     end

+ 81 - 123
src/neural/comp.sv

@@ -1,90 +1,41 @@
-`include "../blocks/abus.sv"
-`include "../fpu32/fpu32.sv"
-
 /*
-          ____
-   x0 -->|ADD0|--> y0
-   x1 -->|    |
- bus0 <->|    |<-- ack0
- bus1 <->|____|--> stb0 
-          ____ 
-   x2 -->|ADD1|--> y1
-   x3 -->|    |
- bus2 <->|    |<-- ack1
- bus3 <->|____|--> stb1
+              _____
+     x[0] ==>|  A  |
+ x_stb[0] -->|  D  |
+ x_ack[0] <--|  D  |==> y
+             |  E  |--> y_stb
+     x[1] ==>|  R  |<-- y_ack
+ x_stb[1] -->|     |
+ x_ack[1] <--|_____|
 
 */
-
-module adder4to2#(parameter N=32)(x, clk, rst, y, left, right);
+module cadder#(parameter N=32)(clk, rst, x, x_ack, x_stb, y, y_ack, y_stb);
     input logic clk;
     input logic rst;
-    input wire [N-1:0] x [3:0];
-    output logic [N-1:0] y [1:0];
-    abus_io left[3:0];
-    abus_io right[1:0];
+    input wire [N-1:0] x [1:0];
+    output logic [N-1:0] y;
+    output x_ack[1:0];
+    input x_stb[1:0];
+    input y_ack;
+    output y_stb;
 
-    wire out_stb [1:0];
-    assign right.stb = out_stb[0] & out_stb[1];
+    wire left_ack, left_stb;
+    assign x_ack[0] = left_ack;
+    assign x_ack[1] = left_ack;
+    assign left_stb = x_stb[0] & x_stb[1];
 
     adder add0 (
         .clk(clk),
         .rst(rst),
         .input_a(x[0]),
         .input_b(x[1]),
-        .input_stb(left0.stb),
-        .input_ack(left0.ack),
-        .output_z(y[0]),
-        .output_z_ack(right.ack),
-        .output_z_stb(out_stb[0])
+        .input_stb(left_stb),
+        .input_ack(left_ack),
+        .output_z(y),
+        .output_z_ack(y_ack),
+        .output_z_stb(y_stb)
     );
-
-    adder add1 (
-        .clk(clk),
-        .rst(rst),
-        .input_a(x[2]),
-        .input_b(x[3]),
-        .input_stb(left1.stb),
-        .input_ack(left1.ack),
-        .output_z(y[1]),
-        .output_z_ack(right.ack),
-        .output_z_stb(out_stb[1])
-    );
-
-endmodule : adder4to2
-
-
-module adder4to2_tb();
-    logic clk, rst;
-    
-    logic [31:0] x [3:0];
-    logic [31:0] y [1:0];
-    abus_io inputBus();
-    abus_io outputBus();
-    
-    adder4to2 adder_casc(.clk(clk), .rst(rst), .x(x), .y(y), .left(inputBus.right), .right(outputBus.left));    
-    initial forever #5 clk = ~clk;
-    initial begin
-        $display("Testing adder4to2");
-        clk = 0;
-        rst = 1;
-        inputBus.stb = 0;
-        outputBus.ack = 0;
-        #20
-        rst = 0;
-        x = {'h41388000, 'h407c0000, 'h42480000, 'h42460000};
-        inputBus.stb = 1;
-        wait(inputBus.ack == 1);
-        #15 inputBus.stb = 0;
-        
-        wait(outputBus.stb == 1);
-        outputBus.ack = 1;
-        assert(y[0] == 'h42c70000);
-        assert(y[1] == 'h41778000);
-        wait(outputBus.stb == 0);
-        outputBus.ack = 0;
-    end
-    
-endmodule : adder4to2_tb
+endmodule : cadder
 
 /*
   K layers of cascade adder
@@ -108,11 +59,6 @@ IN | K3 |  K2  |  K1  | OUT
 [inputs]
 x size: 2**K
 left io size: 2**K
-
-[internal]
-layer connecting wires: 2**K - 2
-number of io buses: 2**(K-1) - 1
-adder4to2 modules: 2**(K-2)
 */
 
 module adder_casc#(parameter K,N=32)(clk, rst, x, y, left, right);
@@ -123,51 +69,60 @@ module adder_casc#(parameter K,N=32)(clk, rst, x, y, left, right);
     
     abus_io right;
     abus_io left[2**K-1:0];
-    
+
     wire [N-1:0] layer_w [2**K-3:0];
-    abus_io bus_w[2**(K-1)-2:0]();
-    
+    wire ack_w [2**K-3:0];
+    wire stb_w [2**K-3:0];
+
     genvar i,j;
     generate
-        for(i=0; i<K; i++) begin : generate_layers    
+        for(i=0; i<K; i++) begin : generate_layers
             // First layers
             if(i == 0) begin
-                for(j=0; j<2**(K-2); j++) begin : generate_casc0
-                    adder4to2 a(
+                for(j=0; j<2**(K-1); j++) begin : generate_casc0
+                    cadder a(
                       .clk(clk),
                       .rst(rst),
-                      .x(x[j*4+:4]),
+                      .x(x[j*2+:2]),
                       .y(layer_w[j]),
-                      .left0(left[j*2].right),
-                      .left1(left[j*2+1].right),
-                      .right(bus_w[j].left)
+                      .x_ack({left[j*2].ack, left[j*2+1].ack}),
+                      .x_stb({left[j*2].stb, left[j*2+1].stb}),
+                      .y_ack(ack_w[j]),
+                      .y_stb(stb_w[j])
                     );
                 end
             end
             // Last layer
             else if((K-i) <= 1) begin
-                adder c(
+                localparam s0 = 2**K-4;
+                localparam s1 = 2**K-3;
+                cadder c(
                     .clk(clk),
                     .rst(rst),
-                    .input_a(layer_w[i-1][0]),
-                    .input_b(layer_w[i-1][1]),
-                    .input_stb(bus_w[i-1].stb),
-                    .input_ack(bus_w[i-1].ack),
-                    .output_z(y),
-                    .output_z_ack(right.ack),
-                    .output_z_stb(right.stb)
-                    );
+                    .x(layer_w[s0+:2]),
+                    .y(y),
+                    .x_ack({ack_w[s0], ack_w[s1]}),
+                    .x_stb({stb_w[s0], stb_w[s1]}),
+                    .y_ack(right.ack),
+                    .y_stb(right.stb)
+                );
             end
             // Middle layers
             else begin
-                for(j=0; j<2**(K-i-2); j++) begin : generate_casc1
-                    adder4to2 b(
-                      .clk(clk),
-                      .rst(rst),
-                      .x(layer_w[i-1][j*4+:4]),
-                      .y(layer_w[i][j*2+:2]),
-                      .left(bus_w[i-1][j].right),
-                      .right(bus_w[i][j].left)
+                for(j=0; j<2**(K-i-1); j++) begin : generate_casc1
+                    localparam s = $floor((2.0**(K-1.0) * (2.0**(i-1)-1.0)/2.0**(i-1))+j);
+                    localparam ix = s*2;
+                    localparam iy = s+2**(K-1);
+
+                    cadder b(
+                        .clk(clk),
+                        .rst(rst),
+                        .x(layer_w[ix+:2]),
+                        .y(layer_w[iy]),
+                        .x_ack(ack_w[ix+:2]),
+                        .x_stb(stb_w[ix+:2]),
+                        .y_ack(ack_w[iy]),
+                        .y_stb(stb_w[iy])
                     );
                 end
             end
@@ -179,24 +134,25 @@ endmodule : adder_casc
 module adder_casc_tb();
     logic clk, rst;
     
-    localparam K=3;
-    logic [31:0] x [7:0];
+    localparam K=4;
+    logic [31:0] x [2**K-1:0];
     logic [31:0] y;
+    logic ack [2**K-1:0];
+    logic stb [2**K-1:0];
+
     abus_io input_ios[2**K-1:0]();
     abus_io output_io();
     
-    virtual abus_io input_vios[2**K-1:0];
     genvar k;
     generate
-        for(k=0; k<2**K; k++) begin : map_generator
-            initial begin : map_physical2virtual
-                input_vios[k] = input_ios[k];
-            end : map_physical2virtual
+        for(k=0; k<2**K; k++) begin : io_mapper
+            assign input_ios[k].stb = stb[k];
+            assign ack[k] = input_ios[k].ack;
         end
     endgenerate
     
     
-    adder_casc#(.K(K)) adder_casc0(.clk(clk), .rst(rst), .x(x), .y(y), .left(input_ios), .right(output_io.left));    
+    adder_casc#(.K(K)) adder_casc0(.clk(clk), .rst(rst), .x(x), .y(y), .left(input_ios), .right(output_io.left));
     initial forever #5 clk = ~clk;
     initial begin
         
@@ -204,26 +160,28 @@ module adder_casc_tb();
         clk = 0;
         rst = 1;
         
-        foreach(input_vios[i]) input_vios[i].stb = 0;
+        foreach(stb[i]) stb[i] = 0;
         output_io.ack = 0;
         #20
         rst = 0;
-        x = {'h43800000, 'h43000000, 'h42800000, 'h42000000, 'h41800000, 'h41000000, 'h40800000, 'h40000000};
+        // Initialise with floating point 2**i
+        foreach(x[i]) x[i] = ('h400 + (i*8)) << 20;
+        foreach(stb[i]) stb[i] = 1;
+
         fork
-            foreach(input_vios[i]) begin
+            foreach(ack[i]) begin
                 fork
-                    input_vios[i].stb = 1;
-                    wait(input_vios[i].ack == 1);
-                    #10
-                    input_vios[i].stb = 0;
+                    wait(ack[i] == 1);
+                    #20
+                    stb[i] = 0;
                 join
             end
         join
-        #20 
+        #20
         
         wait(output_io.stb == 1);
         output_io.ack = 1;
-        assert(y[0] == 'h43ff0000);
+        assert(y[0] == 'h47ffff00);
         wait(output_io.stb == 0);
         output_io.ack = 0;
     end

+ 127 - 15
src/neural/neuron.sv

@@ -2,30 +2,142 @@
 `timescale 1 ps / 1 ps
 // synopsys translate_on
 
+/*
+             ______     _________
+     w[i] =>| MULT |==>|         |
+     x[i] =>|______|   |         |
+             ______    |         |
+   w[i+1] =>| MULT |==>| CASCADE |      _____
+   x[i+1] =>|______|   |         | b =>| ADD |
+                .      |  ADDER  |====>|_____|==> y
+                .      |         |
+             ______    |         |
+   w[M-1] =>| MULT |==>|         |
+   x[M-1] =>|______|   |_________|
 
-module neuron#(parameter M, N=32)(x, y, w, b, stb, ack, clk, rst);
+*/
+
+module neuron#(parameter K, N=32)(x, y, w, b, left, right, clk, rst);
+    localparam M = 2**K;
     input wire [N-1:0] x [M-1:0];
-    input wire [N-1:0] w;
-    input wire [N-1:0] b [M-1:0];
-    output logic stb;
-    input logic ack;
-    input logic clk;
-    input logic rst;
+    input wire [N-1:0] w [M-1:0];
+    input wire [N-1:0] b;
     output logic [N-1:0] y;
 
-    multiplier mult_array[M-1:0](
+    input wire clk;
+    input wire rst;
+
+    abus_io left[M-1:0];
+    abus_io inner_io0[M-1:0]();
+    abus_io inner_io1();
+    abus_io right;
+
+    wire [N-1:0] inner_w [M-1:0];
+    wire [N-1:0] casc_w;
+
+    genvar i;
+    generate
+        for(i=0;i<M;i++) begin: gen_mult_layer
+            multiplier mult(
+                .clk(clk),
+                .rst(rst),
+                .input_a(x[i]),
+                .input_b(w[i]),
+                .input_stb(left[i].stb),
+                .input_ack(left[i].ack),
+                .output_z(inner_w[i]),
+                .output_z_ack(inner_io0[i].ack),
+                .output_z_stb(inner_io0[i].stb)
+            );
+        end
+    endgenerate
+
+    adder_casc#(.K(K), .N(N)) adder0(
         .clk(clk),
         .rst(rst),
-        .input_a(input_a),
-        .input_b(input_b),
-        .input_stb(mult_input_stb),
-        .input_ack(mult_input_ack),
-        .output_z(result_mult),
-        .output_z_ack(mult_output_z_ack),
-        .output_z_stb(mult_output_z_stb),
+        .x(inner_w),
+        .y(casc_w),
+        .left(inner_io0),
+        .right(inner_io1)
+    );
+
+    adder adder1 (
+        .clk(clk),
+        .rst(rst),
+        .input_a(b),
+        .input_b(casc_w),
+        .input_stb(inner_io1.stb),
+        .input_ack(inner_io1.ack),
+        .output_z(y),
+        .output_z_ack(right.ack),
+        .output_z_stb(right.stb)
     );
 
 endmodule : neuron
 
 
+module neuron_tb;
+    logic clk, rst;
+
+    logic [31:0] x [7:0];
+    logic [31:0] w [7:0];
+    logic [31:0] b;
+    logic [31:0] y;
+
+    logic ack [7:0];
+    logic stb [7:0];
+
+    abus_io left[7:0]();
+    abus_io right();
+
+    neuron#(.K(3)) neu0(
+        .clk(clk),
+        .rst(rst),
+        .x(x),
+        .y(y),
+        .w(w),
+        .b(b),
+        .left(left),
+        .right(right)
+    );
+
+    genvar k;
+    generate
+        for(k=0; k<8; k++) begin : io_mapper
+            assign left[k].stb = stb[k];
+            assign ack[k] = left[k].ack;
+        end
+    endgenerate
+
+    initial forever #5 clk = ~clk;
+    initial begin
+        clk = 0;
+        rst = 1;
+        foreach(stb[i]) stb[i] = 0;
+        right.ack = 0;
+        b = 'h3f000000;
+        w = {
+            'h3fa00000, 'h3fa00000, 'h3fa00000, 'h3fa00000,
+            'h3fa00000, 'h3fa00000, 'h3fa00000, 'h3fa00000
+        };
+        x = {
+            // 'h3fa00000, 'h3fa00000, 'h3fa00000, 'h3fa00000,
+            // 'h3fa00000, 'h3fa00000, 'h3fa00000, 'h3fa00000
+            'h417a0000, 'h40fa0000, 'h41fa0000, 'h427a0000,
+            'h407a0000, 'h40780000, 'h40440000, 'h40cc0000
+        };
+        #10;
+        rst = 0;
+        foreach(stb[i]) stb[i] = 1;
+        #20;
+        foreach(stb[i]) stb[i] = 0;
+        wait(right.stb == 1);
+        right.ack = 1;
+        #10
+        wait(right.stb == 0);
+        right.ack = 0;
+    end
+
+
 
+endmodule : neuron_tb