4 gadi atpakaļ · a64c5d0f1e
--- a/src/fpu16/fpu16.sv
+++ b/src/fpu16/fpu16.sv
@@ -62,6 +62,11 @@ module fpu16_tb;
 
				 		expected_mult = 0;
			
 
				 
			
 
				 		// test some common values
			
 
				+		input_a = 'hbec64dc6;
			
 
				+		input_b = 'h3ecc3194;
			
 
				+		# 30;
			
 
				+		expected_add = 'h3c3c79c0;
			
 
				+
			
 
				 		input_a = 'h41ac3000;
			
 
				 		input_b = 'h431f9000;
			
 
				 		# 30;
			
--- a/src/fpu32/fpu32.sv
+++ b/src/fpu32/fpu32.sv
@@ -52,6 +52,7 @@ module fpu32_tb();
 
				         static int num_err = 0;
			
 
				         static int num_tests = $size(test_mem) * 2;
			
 
				 
			
 
				+
			
 
				         clk = 0;
			
 
				         reset = 1;
			
 
				         adder_input_stb = 0;
			
@@ -63,6 +64,18 @@ module fpu32_tb();
 
				         #20;
			
 
				         reset = 0;
			
 
				 
			
 
				+        input_a = 'hbec64dc6;
			
 
				+        input_b = 'h3ecc3194;
			
 
				+
			
 
				+        adder_input_stb = 1;
			
 
				+        wait(adder_input_ack == 1);
			
 
				+        #15;
			
 
				+        adder_input_stb = 0;
			
 
				+        mult_input_stb = 0;
			
 
				+        # 100;
			
 
				+        // expected_add = 'h3c3c79c0;
			
 
				+
			
 
				+
			
 
				 
			
 
				         for (int i=0; i < $size(test_mem); i++) begin
			
 
				             input_a = test_mem[i][0];
			
--- a/src/fpu32p/add32.sv
+++ b/src/fpu32p/add32.sv
@@ -83,17 +83,19 @@ module adder_32(
 
				                         begin
			
 
				                             mantissaa0 <= a[22:0];
			
 
				                             mantissab0 <= b[22:0];
			
 
				+                            outsign0 <= a[31];
			
 
				                         end
			
 
				                     else
			
 
				                         begin
			
 
				                             mantissaa0 <= b[22:0];
			
 
				                             mantissab0 <= a[22:0];
			
 
				+                            outsign0 <= b[31];
			
 
				                         end
			
 
				                     outexponent0 <= a[30:23];
			
 
				-                    outsign0 <= a[31];
			
 
				                 end
			
 
				         end
			
 
				 
			
 
				+    // Stage 1
			
 
				     always_ff @(posedge clk)
			
 
				         begin
			
 
				             mantissab1 <= mantissab0;
			
@@ -106,6 +108,7 @@ module adder_32(
 
				             count1 <= trailingzerodetector(mantissab0);
			
 
				         end
			
 
				 
			
 
				+    // Stage 2
			
 
				     always_ff @(posedge clk)
			
 
				         begin
			
 
				             stickybit2 <= (d1 > count1) ? 1 : 0;
			
@@ -117,9 +120,9 @@ module adder_32(
 
				             eop2 <= eop1;
			
 
				         end
			
 
				 
			
 
				+    // Stage 3
			
 
				     assign mantissabnew2 = (eop2) ? ~(mantissabshift2 + stickybit2) : mantissabshift2 + stickybit2;
			
 
				     assign {carrya2, outmantissaa2} = mantissaanew2 + mantissabnew2 + eop2;
			
 
				-    assign {carrya2, outmantissaa2} = mantissaanew2 + mantissabnew2 + eop2;
			
 
				 
			
 
				     wire cond2;
			
 
				     reg ext3, cond3;
			
@@ -139,6 +142,7 @@ module adder_32(
 
				                 shift3 <= equal2 | (carrya2 & (~eop2));
			
 
				         end
			
 
				 
			
 
				+    // Stage 4
			
 
				     always_ff @(posedge clk)
			
 
				         begin
			
 
				             outsign4 <= outsign3;
			
--- a/src/fpu32p/add32a.sv
+++ b/src/fpu32p/add32a.sv
@@ -0,0 +1,92 @@
 
				+
			
 
				+module adder_32a (clk, rst, a, b, y);
			
 
				+    input clk, rst;
			
 
				+    input [31:0] a, b;
			
 
				+    output [31:0] y;
			
 
				+
			
 
				+    // Alignment
			
 
				+    wire exchange = (b[30:0] > a[30:0]);
			
 
				+    wire [31:0] fp_large = exchange? b : a;
			
 
				+    wire [31:0] fp_small = exchange? a : b;
			
 
				+
			
 
				+    wire [23:0] large_frac24 = {|fp_large[30:23], fp_large[22:0]};
			
 
				+    wire [23:0] small_frac24 = {|fp_small[30:23], fp_small[22:0]};
			
 
				+
			
 
				+    wire sign = exchange ? b[31] : a[31];
			
 
				+    wire op_sub = fp_large[31] ^ fp_small[31];
			
 
				+
			
 
				+    wire [7:0] exp_diff = fp_large[30:23] - fp_small[30:23];
			
 
				+    wire small_den_only = (fp_large[30:23] != 0) & (fp_small[30:23] == 0);
			
 
				+    wire [7:0] shift_amount = small_den_only? exp_diff - 8'h1 : exp_diff;
			
 
				+    wire [49:0] small_frac50 = (shift_amount >= 26) ? {26'h0,small_frac24} : {small_frac24,26'h0} >> shift_amount;
			
 
				+    wire [26:0] small_frac27 = {small_frac50[49:24],|small_frac50[23:0]};
			
 
				+
			
 
				+    reg sign1, op_sub1;
			
 
				+    reg [7:0] exp1;
			
 
				+    reg [23:0] large_frac1;
			
 
				+    reg [26:0] small_frac1;
			
 
				+
			
 
				+    always_ff @(posedge clk) begin
			
 
				+        sign1 <= sign;
			
 
				+        exp1 <= fp_large[30:23];
			
 
				+        op_sub1 <= op_sub;
			
 
				+        large_frac1 <= large_frac24;
			
 
				+        small_frac1 <= small_frac27;
			
 
				+    end
			
 
				+
			
 
				+    // Calculation
			
 
				+    wire [27:0] aligned_large_frac = {1'b0,large_frac1,3'b000};
			
 
				+    wire [27:0] aligned_small_frac = {1'b0,small_frac1};
			
 
				+    wire cal_frac [27:0] = op_sub1 ?
			
 
				+        aligned_large_frac - aligned_small_frac :
			
 
				+        aligned_large_frac + aligned_small_frac;
			
 
				+
			
 
				+    always_ff @(posedge clk) begin
			
 
				+        sign2 <= sign1;
			
 
				+        exp2 <= exp1;
			
 
				+        frac2 <= cal_frac;
			
 
				+    end
			
 
				+
			
 
				+    wire [26:0] f4,f3,f2,f1,f0;
			
 
				+    wire [4:0] zeros;
			
 
				+    assign zeros[4] = ~|frac2[26:11]; // 16-bit 0
			
 
				+    assign f4 = zeros[4]? {frac2[10:0],16'b0} : frac2[26:0];
			
 
				+    assign zeros[3] = ~|f4[26:19]; // 8-bit 0
			
 
				+    assign f3 = zeros[3]? {f4[18:0], 8'b0} : f4;
			
 
				+    assign zeros[2] = ~|f3[26:23]; // 4-bit 0
			
 
				+    assign f2 = zeros[2]? {f3[22:0], 4'b0} : f3;
			
 
				+    assign zeros[1] = ~|f2[26:25]; // 2-bit 0
			
 
				+    assign f1 = zeros[1]? {f2[24:0], 2'b0} : f2;
			
 
				+    assign zeros[0] = ~f1[26]; // 1-bit 0
			
 
				+    assign f0 = zeros[0]? {f1[25:0], 1'b0} : f1;
			
 
				+
			
 
				+    reg [26:0] frac3;
			
 
				+    reg [7:0] exp3;
			
 
				+    reg sign3;
			
 
				+
			
 
				+    always_ff @(posedge clk) begin
			
 
				+        sign3 <= sing2;
			
 
				+        if (cal_frac[27]) begin
			
 
				+            frac3 <= frac2[27:1]; // 1x.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+            exp3 <= exp2 + 8'h1; // 1.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+        end else begin
			
 
				+            if ((temp_exp > zeros) && (f0[26])) begin // a normalized number
			
 
				+                exp3 <= temp_exp - zeros;
			
 
				+                frac3 <= f0; // 01.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+            end else begin // is a denormalized number or 0
			
 
				+                exp3 <= 0;
			
 
				+                if (temp_exp != 0) // (e - 127) = ((e - 1) - 126)
			
 
				+                    frac3 <= frac2[26:0] << (exp2 - 8'h1);
			
 
				+                else frac3 <= frac2[26:0];
			
 
				+            end
			
 
				+        end
			
 
				+    end
			
 
				+
			
 
				+    wire frac_plus_1 = // for rounding
			
 
				+    frac3[2] & (frac3[1] | frac3[0]) | frac3[2] & ~frac3[1] & ~frac3[0] & frac3[3];
			
 
				+
			
 
				+    wire [24:0] frac_round = {1'b0,frac3[26:3]} + frac_plus_1;
			
 
				+    wire [7:0] exponent = frac_round[24]? exp3 + 8'h1 : exp3;
			
 
				+    assign s = {sign3,exponent,frac_round[22:0]};
			
 
				+
			
 
				+endmodule : adder_32a
			
--- a/src/fpu32p/add32c.sv
+++ b/src/fpu32p/add32c.sv
@@ -0,0 +1,268 @@
 
				+
			
 
				+module adder_32c (a,b,sub,rm,s,clk,clrn,e); // pipelined fp adder
			
 
				+    input clk, clrn; // clock and reset
			
 
				+    input [31:0] a, b; // fp a and b
			
 
				+    input [1:0] rm; // round mode
			
 
				+    input sub; // 1: sub; 0: add
			
 
				+    input e; // enable
			
 
				+    output [31:0] s; // fp output
			
 
				+    wire [26:0] a_small_frac;
			
 
				+    wire [23:0] a_large_frac;
			
 
				+    wire [22:0] a_inf_nan_frac;
			
 
				+    wire [7:0] a_exp;
			
 
				+    wire a_is_nan,a_is_inf;
			
 
				+    wire a_sign;
			
 
				+    wire a_op_sub;
			
 
				+    // exe1: alignment stage
			
 
				+    fadd_align alignment (a,b,sub,a_is_nan,a_is_inf,a_inf_nan_frac,a_sign,
			
 
				+        a_exp,a_op_sub,a_large_frac,a_small_frac);
			
 
				+    wire [26:0] c_small_frac;
			
 
				+    wire [23:0] c_large_frac;
			
 
				+    wire [22:0] c_inf_nan_frac;
			
 
				+    wire [7:0] c_exp;
			
 
				+    wire [1:0] c_rm;
			
 
				+    wire c_is_nan,c_is_inf;
			
 
				+    wire c_sign;
			
 
				+    wire c_op_sub;
			
 
				+    // pipelined registers
			
 
				+    reg_align_cal reg_ac (rm,a_is_nan,a_is_inf,a_inf_nan_frac,a_sign,a_exp,
			
 
				+        a_op_sub,a_large_frac,a_small_frac,clk,clrn,e,
			
 
				+        c_rm,c_is_nan,c_is_inf,c_inf_nan_frac,c_sign,
			
 
				+        c_exp,c_op_sub,c_large_frac,c_small_frac);
			
 
				+    wire [27:0] c_frac;
			
 
				+    // exe2: calculation stage
			
 
				+    fadd_cal calculation(c_op_sub,c_large_frac,c_small_frac,c_frac);
			
 
				+    wire [27:0] n_frac;
			
 
				+    wire [22:0] n_inf_nan_frac;
			
 
				+    wire [7:0] n_exp;
			
 
				+    wire [1:0] n_rm;
			
 
				+    wire n_is_nan,n_is_inf;
			
 
				+    wire n_sign;
			
 
				+    // pipelined registers
			
 
				+    reg_cal_norm reg_cn (c_rm,c_is_nan,c_is_inf,c_inf_nan_frac,c_sign,c_exp,
			
 
				+        c_frac,clk,clrn,e,n_rm,n_is_nan,n_is_inf,
			
 
				+        n_inf_nan_frac,n_sign,n_exp,n_frac);
			
 
				+    // exe3: normalization stage
			
 
				+    fadd_norm normalization (n_rm,n_is_nan,n_is_inf,n_inf_nan_frac,n_sign,
			
 
				+        n_exp,n_frac,s);
			
 
				+endmodule
			
 
				+
			
 
				+//==============================================
			
 
				+// the alignment stage.
			
 
				+module fadd_align (a,b,sub,s_is_nan,s_is_inf,inf_nan_frac,sign,temp_exp,
			
 
				+    op_sub,large_frac24,small_frac27); //alignment stage
			
 
				+    input [31:0] a,b;
			
 
				+    input sub;
			
 
				+    output [26:0] small_frac27;
			
 
				+    output [23:0] large_frac24;
			
 
				+    output [22:0] inf_nan_frac;
			
 
				+    output [7:0] temp_exp;
			
 
				+    output s_is_nan;
			
 
				+    output s_is_inf;
			
 
				+    output sign;
			
 
				+    output op_sub;
			
 
				+    wire exchange = (b[30:0] > a[30:0]);
			
 
				+    wire [31:0] fp_large = exchange? b : a;
			
 
				+    wire [31:0] fp_small = exchange? a : b;
			
 
				+    wire fp_large_hidden_bit = |fp_large[30:23];
			
 
				+    wire fp_small_hidden_bit = |fp_small[30:23];
			
 
				+    wire [23:0] large_frac24 = {fp_large_hidden_bit,fp_large[22:0]};
			
 
				+    wire [23:0] small_frac24 = {fp_small_hidden_bit,fp_small[22:0]};
			
 
				+
			
 
				+    assign temp_exp = fp_large[30:23];
			
 
				+    assign sign = exchange? sub ^ b[31] : a[31];
			
 
				+    assign op_sub = sub ^ fp_large[31] ^ fp_small[31];
			
 
				+
			
 
				+    wire fp_large_expo_is_ff = &fp_large[30:23]; // exp == 0xff
			
 
				+    wire fp_small_expo_is_ff = &fp_small[30:23];
			
 
				+    wire fp_large_frac_is_00 = ~|fp_large[22:0]; // frac == 0x0
			
 
				+    wire fp_small_frac_is_00 = ~|fp_small[22:0];
			
 
				+    wire fp_large_is_inf=fp_large_expo_is_ff & fp_large_frac_is_00;
			
 
				+    wire fp_small_is_inf=fp_small_expo_is_ff & fp_small_frac_is_00;
			
 
				+    wire fp_large_is_nan=fp_large_expo_is_ff & ~fp_large_frac_is_00;
			
 
				+    wire fp_small_is_nan=fp_small_expo_is_ff & ~fp_small_frac_is_00;
			
 
				+    assign s_is_inf = fp_large_is_inf | fp_small_is_inf;
			
 
				+    wire s_is_nan = fp_large_is_nan | fp_small_is_nan |
			
 
				+        ((sub ^ fp_small[31] ^ fp_large[31]) &
			
 
				+            fp_large_is_inf & fp_small_is_inf);
			
 
				+    wire [22:0] nan_frac = (a[21:0] > b[21:0])?
			
 
				+        {1'b1,a[21:0]} : {1'b1,b[21:0]};
			
 
				+    assign inf_nan_frac = s_is_nan? nan_frac : 23'h0;
			
 
				+    wire [7:0] exp_diff = fp_large[30:23] - fp_small[30:23];
			
 
				+    wire small_den_only = (fp_large[30:23] != 0) &
			
 
				+        (fp_small[30:23] == 0);
			
 
				+    wire [7:0] shift_amount = small_den_only? exp_diff - 8'h1 : exp_diff;
			
 
				+    wire [49:0] small_frac50 = (shift_amount >= 26)?
			
 
				+        {26'h0,small_frac24} :
			
 
				+        {small_frac24,26'h0} >> shift_amount;
			
 
				+    assign small_frac27 = {small_frac50[49:24],|small_frac50[23:0]};
			
 
				+endmodule
			
 
				+
			
 
				+//======================================================================
			
 
				+// pipeline registers in between the alignment and calculation stages
			
 
				+module reg_align_cal (a_rm,a_is_nan,a_is_inf,a_inf_nan_frac,a_sign,a_exp,
			
 
				+    a_op_sub,a_large_frac,a_small_frac,clk,clrn,e,c_rm,
			
 
				+    c_is_nan,c_is_inf,c_inf_nan_frac,c_sign,c_exp,
			
 
				+    c_op_sub,c_large_frac,c_small_frac); // pipeline regs
			
 
				+    input [26:0] a_small_frac;
			
 
				+    input [23:0] a_large_frac;
			
 
				+    input [22:0] a_inf_nan_frac;
			
 
				+    input [7:0] a_exp;
			
 
				+    input [1:0] a_rm;
			
 
				+    input a_is_nan, a_is_inf, a_sign, a_op_sub;
			
 
				+    input e; // e: enable
			
 
				+    input clk, clrn; // clock and reset
			
 
				+    output reg [26:0] c_small_frac;
			
 
				+    output reg [23:0] c_large_frac;
			
 
				+    output reg [22:0] c_inf_nan_frac;
			
 
				+    output reg [7:0] c_exp;
			
 
				+    output reg [1:0] c_rm;
			
 
				+    output reg c_is_nan,c_is_inf,c_sign,c_op_sub;
			
 
				+
			
 
				+    always @ (posedge clk or negedge clrn) begin
			
 
				+        if (!clrn) begin
			
 
				+            c_rm <= 0;
			
 
				+            c_is_nan <= 0;
			
 
				+            c_is_inf <= 0;
			
 
				+            c_inf_nan_frac <= 0;
			
 
				+            c_sign <= 0;
			
 
				+            c_exp <= 0;
			
 
				+            c_op_sub <= 0;
			
 
				+            c_large_frac <= 0;
			
 
				+            c_small_frac <= 0;
			
 
				+        end else if (e) begin
			
 
				+            c_rm <= a_rm;
			
 
				+            c_is_nan <= a_is_nan;
			
 
				+            c_is_inf <= a_is_inf;
			
 
				+            c_inf_nan_frac <= a_inf_nan_frac;
			
 
				+            c_sign <= a_sign;
			
 
				+            c_exp <= a_exp;
			
 
				+            c_op_sub <= a_op_sub;
			
 
				+            c_large_frac <= a_large_frac;
			
 
				+            c_small_frac <= a_small_frac;
			
 
				+        end
			
 
				+    end
			
 
				+endmodule
			
 
				+
			
 
				+// ===========================================================================
			
 
				+// the calculation stage.
			
 
				+module fadd_cal (op_sub,large_frac24,small_frac27, cal_frac); // calculation
			
 
				+    input [23:0] large_frac24;
			
 
				+    input op_sub;
			
 
				+    input [26:0] small_frac27;
			
 
				+    output [27:0] cal_frac;
			
 
				+    wire [27:0] aligned_large_frac = {1'b0,large_frac24,3'b000};
			
 
				+    wire [27:0] aligned_small_frac = {1'b0,small_frac27};
			
 
				+    assign cal_frac = op_sub?
			
 
				+        aligned_large_frac - aligned_small_frac :
			
 
				+        aligned_large_frac + aligned_small_frac;
			
 
				+endmodule
			
 
				+
			
 
				+module reg_cal_norm (c_rm,c_is_nan,c_is_inf,c_inf_nan_frac,c_sign,c_exp,
			
 
				+    c_frac,clk,clrn,e,n_rm,n_is_nan,n_is_inf,
			
 
				+    n_inf_nan_frac,n_sign,n_exp,n_frac); // pipeline regs
			
 
				+    input [27:0] c_frac;
			
 
				+    input [22:0] c_inf_nan_frac;
			
 
				+    input [7:0] c_exp;
			
 
				+    input [1:0] c_rm;
			
 
				+    input c_is_nan, c_is_inf, c_sign;
			
 
				+    input e; // e: enable
			
 
				+    input clk, clrn; // clock and reset
			
 
				+    output reg [27:0] n_frac;
			
 
				+    output reg [22:0] n_inf_nan_frac;
			
 
				+    output reg [7:0] n_exp;
			
 
				+    output reg [1:0] n_rm;
			
 
				+    output reg n_is_nan,n_is_inf,n_sign;
			
 
				+    always @ (posedge clk or negedge clrn) begin
			
 
				+        if (!clrn) begin
			
 
				+            n_rm <= 0;
			
 
				+            n_is_nan <= 0;
			
 
				+            n_is_inf <= 0;
			
 
				+            n_inf_nan_frac <= 0;
			
 
				+            n_sign <= 0;
			
 
				+            n_exp <= 0;
			
 
				+            n_frac <= 0;
			
 
				+        end else if (e) begin
			
 
				+            n_rm <= c_rm;
			
 
				+            n_is_nan <= c_is_nan;
			
 
				+            n_is_inf <= c_is_inf;
			
 
				+            n_inf_nan_frac <= c_inf_nan_frac;
			
 
				+            n_sign <= c_sign;
			
 
				+            n_exp <= c_exp;
			
 
				+            n_frac <= c_frac;
			
 
				+        end
			
 
				+    end
			
 
				+endmodule
			
 
				+
			
 
				+//=================================================================
			
 
				+// normalization
			
 
				+
			
 
				+module fadd_norm (rm,is_nan,is_inf,inf_nan_frac,sign,temp_exp,cal_frac,s);
			
 
				+    input [27:0] cal_frac;
			
 
				+    input [22:0] inf_nan_frac;
			
 
				+    input [7:0] temp_exp;
			
 
				+    input [1:0] rm;
			
 
				+    input is_nan,is_inf;
			
 
				+    input sign;
			
 
				+    output [31:0] s;
			
 
				+    wire [26:0] f4,f3,f2,f1,f0;
			
 
				+    wire [4:0] zeros;
			
 
				+    assign zeros[4] = ~|cal_frac[26:11]; // 16-bit 0
			
 
				+    assign f4 = zeros[4]? {cal_frac[10:0],16'b0} : cal_frac[26:0];
			
 
				+    assign zeros[3] = ~|f4[26:19]; // 8-bit 0
			
 
				+    assign f3 = zeros[3]? {f4[18:0], 8'b0} : f4;
			
 
				+    assign zeros[2] = ~|f3[26:23]; // 4-bit 0
			
 
				+    assign f2 = zeros[2]? {f3[22:0], 4'b0} : f3;
			
 
				+    assign zeros[1] = ~|f2[26:25]; // 2-bit 0
			
 
				+    assign f1 = zeros[1]? {f2[24:0], 2'b0} : f2;
			
 
				+    assign zeros[0] = ~f1[26]; // 1-bit 0
			
 
				+    assign f0 = zeros[0]? {f1[25:0], 1'b0} : f1;
			
 
				+    reg [26:0] frac0;
			
 
				+    reg [7:0] exp0;
			
 
				+    always @ * begin
			
 
				+        if (cal_frac[27]) begin
			
 
				+            frac0 = cal_frac[27:1]; // 1x.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+            exp0 = temp_exp + 8'h1; // 1.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+        end else begin
			
 
				+            if ((temp_exp > zeros) && (f0[26])) begin // a normalized number
			
 
				+                exp0 = temp_exp - zeros;
			
 
				+                frac0 = f0; // 01.xxxxxxxxxxxxxxxxxxxxxxx xxx
			
 
				+            end else begin // is a denormalized number or 0
			
 
				+                exp0 = 0;
			
 
				+                if (temp_exp != 0) // (e - 127) = ((e - 1) - 126)
			
 
				+                    frac0 = cal_frac[26:0] << (temp_exp - 8'h1);
			
 
				+                else frac0 = cal_frac[26:0];
			
 
				+            end
			
 
				+        end
			
 
				+    end
			
 
				+    wire frac_plus_1 = // for rounding
			
 
				+    ~rm[1] & ~rm[0] & frac0[2] & (frac0[1] | frac0[0]) |
			
 
				+        ~rm[1] & ~rm[0] & frac0[2] & ~frac0[1] & ~frac0[0] & frac0[3] |
			
 
				+            ~rm[1] & rm[0] & (frac0[2] | frac0[1] | frac0[0]) & sign |
			
 
				+                rm[1] & ~rm[0] & (frac0[2] | frac0[1] | frac0[0]) & ~sign;
			
 
				+    wire [24:0] frac_round = {1'b0,frac0[26:3]} + frac_plus_1;
			
 
				+    wire [7:0] exponent = frac_round[24]? exp0 + 8'h1 : exp0;
			
 
				+    wire overflow = &exp0 | &exponent;
			
 
				+    assign s = {sign,exponent,frac_round[22:0]};
			
 
				+    // assign s = final_result(overflow, rm, sign, is_nan, is_inf, exponent,
			
 
				+    //     frac_round[22:0], inf_nan_frac);
			
 
				+    // function [31:0] final_result;
			
 
				+    //     input overflow;
			
 
				+    //     input [1:0] rm;
			
 
				+    //     input sign, is_nan, is_inf;
			
 
				+    //     input [7:0] exponent;
			
 
				+    //     input [22:0] fraction, inf_nan_frac;
			
 
				+    //     casex ({overflow, rm, sign, is_nan, is_inf})
			
 
				+    //         6'b1_00_x_0_x : final_result = {sign,8'hff,23'h000000}; // inf
			
 
				+    //         6'b1_01_0_0_x : final_result = {sign,8'hfe,23'h7fffff}; // max
			
 
				+    //         6'b1_01_1_0_x : final_result = {sign,8'hff,23'h000000}; // inf
			
 
				+    //         6'b1_10_0_0_x : final_result = {sign,8'hff,23'h000000}; // inf
			
 
				+    //         6'b1_10_1_0_x : final_result = {sign,8'hfe,23'h7fffff}; // max
			
 
				+    //         6'b1_11_x_0_x : final_result = {sign,8'hfe,23'h7fffff}; // max
			
 
				+    //         6'b0_xx_x_0_0 : final_result = {sign,exponent,fraction}; // nor
			
 
				+    //         6'bx_xx_x_1_x : final_result = {1'b1,8'hff,inf_nan_frac}; // nan
			
 
				+    //         6'bx_xx_x_0_1 : final_result = {sign,8'hff,inf_nan_frac}; // inf
			
 
				+    //         default : final_result = {sign,8'h00,23'h000000}; // 0
			
 
				+    //     endcase
			
 
				+    // endfunction : final_result
			
 
				+endmodule
			
--- a/src/fpu32p/fpu32p.sv
+++ b/src/fpu32p/fpu32p.sv
@@ -1,6 +1,10 @@
 
				 `include "mult32.v"
			
 
				-`include "add32.sv"
			
 
				+// `include "add32.sv"
			
 
				+// `include "add32b.sv"
			
 
				+`include "add32c.sv"
			
 
				 
			
 
				+`define PP_FP_MULT 12
			
 
				+`define PP_FP_ADDER 1
			
 
				 
			
 
				 module fpu32p_tb;
			
 
				 
			
@@ -15,11 +19,22 @@ module fpu32p_tb;
 
				     assign {w_exp_sign, w_exp_exp, w_exp_man} = expected_add;
			
 
				     assign {w_res_sign, w_res_exp, w_res_man} = result_add;
			
 
				 
			
 
				-    adder_32 adder1(
			
 
				+    // adder_32 adder1(
			
 
				+    //     .clk(clk),
			
 
				+    //     .a(input_a),
			
 
				+    //     .b(input_b),
			
 
				+    //     .out(result_add)
			
 
				+    // );
			
 
				+    // pipelined_fadder adder1(result_add, input_a, input_b, clk);
			
 
				+    adder_32c adder0(
			
 
				         .clk(clk),
			
 
				+        .clrn(~reset),
			
 
				         .a(input_a),
			
 
				         .b(input_b),
			
 
				-        .out(result_add)
			
 
				+        .sub(1'b0),
			
 
				+        .rm(2'b00),
			
 
				+        .s(result_add),
			
 
				+        .e(1'b1)
			
 
				     );
			
 
				 
			
 
				     mult_32 multiplier1(
			
@@ -33,8 +48,8 @@ module fpu32p_tb;
 
				     );
			
 
				 
			
 
				     initial forever #5 clk = ~clk;
			
 
				-    localparam PIPELINES_ADD = 4;
			
 
				-    localparam PIPELINES_MUL = 12;
			
 
				+    localparam PIPELINES_ADD = `PP_FP_ADDER;
			
 
				+    localparam PIPELINES_MUL = `PP_FP_MULT;
			
 
				 
			
 
				     reg [31:0] test_mem [29:0][3:0];
			
 
				 
			
@@ -50,8 +65,8 @@ module fpu32p_tb;
 
				         #5;
			
 
				         reset = 0;
			
 
				 
			
 
				-        input_a = 'hbec64dc6;
			
 
				-        input_b = 'h3ecc3194;
			
 
				+        input_b = 'hbec64dc6;
			
 
				+        input_a = 'h3ecc3194;
			
 
				 
			
 
				         expected_add = 'h3c3c79c0;
			
 
				         expected_mult = 0;
			
--- a/src/neural/adder_casc_tb.sv
+++ b/src/neural/adder_casc_tb.sv
@@ -0,0 +1,51 @@
 
				+module adder_casc_tb();
			
 
				+    logic clk, rst;
			
 
				+
			
 
				+    localparam K=2;
			
 
				+    logic [31:0] x [2**K-1:0];
			
 
				+    logic [31:0] y0, y1;
			
 
				+    logic ack [2**K-1:0];
			
 
				+    logic stb [2**K-1:0];
			
 
				+
			
 
				+    abus_io input_ios[2**K-1:0]();
			
 
				+    abus_io output_io();
			
 
				+
			
 
				+    genvar k;
			
 
				+    generate
			
 
				+        for(k=0; k<2**K; k++) begin : io_mapper
			
 
				+            assign input_ios[k].stb = stb[k];
			
 
				+            assign ack[k] = input_ios[k].ack;
			
 
				+        end
			
 
				+    endgenerate
			
 
				+
			
 
				+
			
 
				+    adder_casc#(.K(K)) adder_casc0(.clk(clk), .rst(rst), .x(x), .y(y0), .left(input_ios), .right(output_io.left));
			
 
				+
			
 
				+    adder_casc_p#(.K(K)) adder_casc1(.clk(clk), .rst(rst), .x(x), .y(y1));
			
 
				+
			
 
				+    initial forever #5 clk = ~clk;
			
 
				+    initial begin
			
 
				+
			
 
				+        $display("Testing adder_casc");
			
 
				+        clk = 0;
			
 
				+        rst = 1;
			
 
				+
			
 
				+        foreach(stb[i]) stb[i] = 0;
			
 
				+        output_io.ack = 0;
			
 
				+        // Initialise with floating point 2**i
			
 
				+        // foreach(x[i]) x[i] = ('h400 + (i*8)) << 20;
			
 
				+        x = {'h41ea6000, 'h42ea6000, 'h411ba000, 'h413cc000};
			
 
				+
			
 
				+        #10;
			
 
				+        rst = 0;
			
 
				+        foreach(stb[i]) stb[i] = 1;
			
 
				+        #20;
			
 
				+        foreach(stb[i]) stb[i] = 0;
			
 
				+        wait(output_io.stb == 1);
			
 
				+        output_io.ack = 1;
			
 
				+        assert(y0[0] == 'h47ffff00);
			
 
				+        wait(output_io.stb == 0);
			
 
				+        output_io.ack = 0;
			
 
				+    end
			
 
				+
			
 
				+endmodule : adder_casc_tb
			
--- a/src/neural/comp.sv
+++ b/src/neural/comp.sv
@@ -172,7 +172,7 @@ module adder_casc_p #(parameter K, N=32)(clk, rst, x, y);
 
				                 // Middle layer
			
 
				             else begin
			
 
				                 for(j=0; j<2**(K-i-1); j++) begin : gen_mid_layer
			
 
				-                    localparam s = $floor((2.0**(K-1.0) * (2.0**(i-1)-1.0)/2.0**(i-1))+j);
			
 
				+                    localparam s = $floor((2.0**(K-1.0) * \c(2.0**(i-1)-1.0)/2.0**(i-1))+j);
			
 
				                     localparam ix = s*2;
			
 
				                     localparam ix1 = s*2+1;
			
 
				                     localparam iy = s+2**(K-1);
			
@@ -189,57 +189,6 @@ module adder_casc_p #(parameter K, N=32)(clk, rst, x, y);
 
				     endgenerate
			
 
				 endmodule : adder_casc_p
			
 
				 
			
 
				-module adder_casc_tb();
			
 
				-    logic clk, rst;
			
 
				-    
			
 
				-    localparam K=2;
			
 
				-    logic [31:0] x [2**K-1:0];
			
 
				-    logic [31:0] y0, y1;
			
 
				-    logic ack [2**K-1:0];
			
 
				-    logic stb [2**K-1:0];
			
 
				-
			
 
				-    abus_io input_ios[2**K-1:0]();
			
 
				-    abus_io output_io();
			
 
				-    
			
 
				-    genvar k;
			
 
				-    generate
			
 
				-        for(k=0; k<2**K; k++) begin : io_mapper
			
 
				-            assign input_ios[k].stb = stb[k];
			
 
				-            assign ack[k] = input_ios[k].ack;
			
 
				-        end
			
 
				-    endgenerate
			
 
				-    
			
 
				-    
			
 
				-    adder_casc#(.K(K)) adder_casc0(.clk(clk), .rst(rst), .x(x), .y(y0), .left(input_ios), .right(output_io.left));
			
 
				-
			
 
				-    adder_casc_p#(.K(K)) adder_casc1(.clk(clk), .rst(rst), .x(x), .y(y1));
			
 
				-
			
 
				-    initial forever #5 clk = ~clk;
			
 
				-    initial begin
			
 
				-        
			
 
				-        $display("Testing adder_casc");
			
 
				-        clk = 0;
			
 
				-        rst = 1;
			
 
				-        
			
 
				-        foreach(stb[i]) stb[i] = 0;
			
 
				-        output_io.ack = 0;
			
 
				-        // Initialise with floating point 2**i
			
 
				-        // foreach(x[i]) x[i] = ('h400 + (i*8)) << 20;
			
 
				-        x = {'h41ea6000, 'h42ea6000, 'h411ba000, 'h413cc000};
			
 
				-
			
 
				-        #10;
			
 
				-        rst = 0;
			
 
				-        foreach(stb[i]) stb[i] = 1;
			
 
				-        #20;
			
 
				-        foreach(stb[i]) stb[i] = 0;
			
 
				-        wait(output_io.stb == 1);
			
 
				-        output_io.ack = 1;
			
 
				-        assert(y0[0] == 'h47ffff00);
			
 
				-        wait(output_io.stb == 0);
			
 
				-        output_io.ack = 0;
			
 
				-    end
			
 
				-    
			
 
				-endmodule : adder_casc_tb
			
 
				 
			
 
				 
			
 
				 
			
--- a/src/neural/layer.sv
+++ b/src/neural/layer.sv
@@ -85,83 +85,143 @@ ONE HOT -> ... [ ] ... [ ] ..
 
				 
			
 
				 */
			
 
				 
			
 
				-module neuron_network_tb;
			
 
				-    reg clk, rst;
			
 
				-    reg [31:0] x [3:0];
			
 
				-    reg [31:0] y [1:0];
			
 
				+module neural_network_encoder(clk, rst, x, y);
			
 
				+    input clk, rst;
			
 
				+    input [31:0] x;
			
 
				+    output [31:0] y;
			
 
				 
			
 
				-    abus_io left[3:0]();
			
 
				-    abus_io right[1:0]();
			
 
				+    reg [31:0] layer1_s [7:0];
			
 
				+    reg [31:0] layer2_s [7:0];
			
 
				+    reg [31:0] layer3_s [1:0];
			
 
				+    reg [31:0] y [1:0];
			
 
				 
			
 
				     reg [31:0] layer1_w [0:7][3:0];
			
 
				     reg [31:0] layer1_b [0:7];
			
 
				-    reg [31:0] layer1_o [7:0];
			
 
				-    abus_io layer1_io [7:0]();
			
 
				-
			
 
				     reg [31:0] layer2_w [0:7][7:0];
			
 
				     reg [31:0] layer2_b [7:0];
			
 
				-    reg [31:0] layer2_o [7:0];
			
 
				-    abus_io layer2_io [7:0]();
			
 
				-
			
 
				     reg [31:0] layer3_w [0:1][7:0];
			
 
				     reg [31:0] layer3_b [1:0];
			
 
				-    reg [31:0] layer3_o [1:0];
			
 
				-    abus_io layer3_io [1:0]();
			
 
				-
			
 
				-    logic y_stb;
			
 
				-    assign y_stb = right[0].stb & right[1].stb;
			
 
				 
			
 
				-    neuron_layer#(.C(2), .K(3)) layer1(
			
 
				+    neuron_layer_p#(.C(2), .K(3)) layer_s1(
			
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				         .x(x),
			
 
				-        .y(layer1_o),
			
 
				+        .y(layer1_s),
			
 
				         .w(layer1_w),
			
 
				-        .b(layer1_b),
			
 
				-        .left(left),
			
 
				-        .right(layer1_io)
			
 
				+        .b(layer1_b)
			
 
				     );
			
 
				 
			
 
				-    neuron_layer#(.C(3), .K(3)) layer2(
			
 
				+    neuron_layer_p#(.C(3), .K(3)) layer_s2(
			
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				-        .x(layer1_o),
			
 
				-        .y(layer2_o),
			
 
				+        .x(layer1_s),
			
 
				+        .y(layer2_s),
			
 
				         .w(layer2_w),
			
 
				-        .b(layer2_b),
			
 
				-        .left(layer1_io),
			
 
				-        .right(layer2_io)
			
 
				+        .b(layer2_b)
			
 
				     );
			
 
				 
			
 
				-    neuron_layer#(.C(3), .K(1)) layer3(
			
 
				+    neuron_layer_p#(.C(3), .K(1)) layer_s3(
			
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				-        .x(layer2_o),
			
 
				-        .y(layer3_o),
			
 
				+        .x(layer2_s),
			
 
				+        .y(layer3_s),
			
 
				         .w(layer3_w),
			
 
				-        .b(layer3_b),
			
 
				-        .left(layer2_io),
			
 
				-        .right(layer3_io)
			
 
				+        .b(layer3_b)
			
 
				     );
			
 
				 
			
 
				-    hard_sigmoid sigmoid0(
			
 
				+    hard_sigmoid_p sigmoid_s0(
			
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				-        .x(layer3_o[0]),
			
 
				-        .y(y[0]),
			
 
				-        .left(layer3_io[0]),
			
 
				-        .right(right[0])
			
 
				+        .x(layer3_s[0]),
			
 
				+        .y(ys[0])
			
 
				     );
			
 
				 
			
 
				-    hard_sigmoid sigmoid1(
			
 
				+    hard_sigmoid_p sigmoid_s1(
			
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				-        .x(layer3_o[1]),
			
 
				-        .y(y[1]),
			
 
				-        .left(layer3_io[1]),
			
 
				-        .right(right[1])
			
 
				+        .x(layer3_s[1]),
			
 
				+        .y(ys[1])
			
 
				     );
			
 
				 
			
 
				+endmodule : neural_network_encoder
			
 
				+
			
 
				+module neuron_network_tb;
			
 
				+    reg clk, rst;
			
 
				+    reg [31:0] x [3:0];
			
 
				+    reg [31:0] y [1:0];
			
 
				+
			
 
				+    // abus_io left[3:0]();
			
 
				+    // abus_io right[1:0]();
			
 
				+    //
			
 
				+    // reg [31:0] layer1_w [0:7][3:0];
			
 
				+    // reg [31:0] layer1_b [0:7];
			
 
				+    // reg [31:0] layer1_o [7:0];
			
 
				+    // abus_io layer1_io [7:0]();
			
 
				+    //
			
 
				+    // reg [31:0] layer2_w [0:7][7:0];
			
 
				+    // reg [31:0] layer2_b [7:0];
			
 
				+    // reg [31:0] layer2_o [7:0];
			
 
				+    // abus_io layer2_io [7:0]();
			
 
				+    //
			
 
				+    // reg [31:0] layer3_w [0:1][7:0];
			
 
				+    // reg [31:0] layer3_b [1:0];
			
 
				+    // reg [31:0] layer3_o [1:0];
			
 
				+    // abus_io layer3_io [1:0]();
			
 
				+    //
			
 
				+    // logic y_stb;
			
 
				+    // assign y_stb = right[0].stb & right[1].stb;
			
 
				+    //
			
 
				+    // neuron_layer#(.C(2), .K(3)) layer1(
			
 
				+    //     .clk(clk),
			
 
				+    //     .rst(rst),
			
 
				+    //     .x(x),
			
 
				+    //     .y(layer1_o),
			
 
				+    //     .w(layer1_w),
			
 
				+    //     .b(layer1_b),
			
 
				+    //     .left(left),
			
 
				+    //     .right(layer1_io)
			
 
				+    // );
			
 
				+    //
			
 
				+    // neuron_layer#(.C(3), .K(3)) layer2(
			
 
				+    //     .clk(clk),
			
 
				+    //     .rst(rst),
			
 
				+    //     .x(layer1_o),
			
 
				+    //     .y(layer2_o),
			
 
				+    //     .w(layer2_w),
			
 
				+    //     .b(layer2_b),
			
 
				+    //     .left(layer1_io),
			
 
				+    //     .right(layer2_io)
			
 
				+    // );
			
 
				+    //
			
 
				+    // neuron_layer#(.C(3), .K(1)) layer3(
			
 
				+    //     .clk(clk),
			
 
				+    //     .rst(rst),
			
 
				+    //     .x(layer2_o),
			
 
				+    //     .y(layer3_o),
			
 
				+    //     .w(layer3_w),
			
 
				+    //     .b(layer3_b),
			
 
				+    //     .left(layer2_io),
			
 
				+    //     .right(layer3_io)
			
 
				+    // );
			
 
				+    //
			
 
				+    // hard_sigmoid sigmoid0(
			
 
				+    //     .clk(clk),
			
 
				+    //     .rst(rst),
			
 
				+    //     .x(layer3_o[0]),
			
 
				+    //     .y(y[0]),
			
 
				+    //     .left(layer3_io[0]),
			
 
				+    //     .right(right[0])
			
 
				+    // );
			
 
				+    //
			
 
				+    // hard_sigmoid sigmoid1(
			
 
				+    //     .clk(clk),
			
 
				+    //     .rst(rst),
			
 
				+    //     .x(layer3_o[1]),
			
 
				+    //     .y(y[1]),
			
 
				+    //     .left(layer3_io[1]),
			
 
				+    //     .right(right[1])
			
 
				+    // );
			
 
				+
			
 
				     /* ******************
			
 
				     Pipelined network
			
 
				     ********************/
			
@@ -175,7 +235,8 @@ module neuron_network_tb;
 
				         .rst(rst),
			
 
				         .x(x),
			
 
				         .y(layer1_s),
			
 
				-        .w(layer1_w), .b(layer1_b)
			
 
				+        .w(layer1_w),
			
 
				+        .b(layer1_b)
			
 
				     );
			
 
				 
			
 
				     neuron_layer_p#(.C(3), .K(3)) layer_s2(
			
@@ -183,7 +244,8 @@ module neuron_network_tb;
 
				         .rst(rst),
			
 
				         .x(layer1_s),
			
 
				         .y(layer2_s),
			
 
				-        .w(layer2_w), .b(layer2_b)
			
 
				+        .w(layer2_w),
			
 
				+        .b(layer2_b)
			
 
				     );
			
 
				 
			
 
				     neuron_layer_p#(.C(3), .K(1)) layer_s3(
			
@@ -191,7 +253,8 @@ module neuron_network_tb;
 
				         .rst(rst),
			
 
				         .x(layer2_s),
			
 
				         .y(layer3_s),
			
 
				-        .w(layer3_w), .b(layer3_b)
			
 
				+        .w(layer3_w),
			
 
				+        .b(layer3_b)
			
 
				     );
			
 
				 
			
 
				     hard_sigmoid_p sigmoid_s0(
			
--- a/src/neural/neural.sv
+++ b/src/neural/neural.sv
@@ -16,11 +16,15 @@ module neural_adder(clk, rst, x0, x1, y);
 
				     //     .clk(clk),
			
 
				     //     .reset(rst)
			
 
				     // );
			
 
				-    adder_32 adder0(
			
 
				+    adder_32c adder0(
			
 
				         .clk(clk),
			
 
				+        .clrn(~rst),
			
 
				         .a(x0),
			
 
				         .b(x1),
			
 
				-        .out(y)
			
 
				+        .sub(1'b0),
			
 
				+        .rm(2'b11),
			
 
				+        .s(y),
			
 
				+        .e(1'b1)
			
 
				     );
			
 
				 
			
 
				 endmodule : neural_adder
			
@@ -51,11 +55,11 @@ endmodule : neural_mult
 
				 module neural_comp_gt(x0, x1, y);
			
 
				     input [31:0] x0, x1;
			
 
				     output y;
			
 
				-    fpu32_gt gt0(x0, x1, y0);
			
 
				+    fpu32_gt gt0(x0, x1, y);
			
 
				 endmodule : neural_comp_gt
			
 
				 
			
 
				 module neural_comp_lt(x0, x1, y);
			
 
				     input [31:0] x0, x1;
			
 
				     output y;
			
 
				-    fpu32_lt lt0(x0, x1, y0);
			
 
				+    fpu32_lt lt0(x0, x1, y);
			
 
				 endmodule : neural_comp_lt
			
--- a/src/neural/sigmoid.sv
+++ b/src/neural/sigmoid.sv
@@ -1,4 +1,10 @@
 
				 `include "../fpu32/compare.sv"
			
 
				+`include "../fpu32p/fpu32p.sv"
			
 
				+
			
 
				+// synopsys translate_off
			
 
				+`timescale 1 ps / 1 ps
			
 
				+// synopsys translate_on
			
 
				+
			
 
				 
			
 
				 typedef enum logic [2:0] {
			
 
				     hs_input,
			
@@ -123,11 +129,35 @@ module hard_sigmoid #(parameter N=32)(clk, rst, x, y, left, right);
 
				 
			
 
				 endmodule : hard_sigmoid
			
 
				 
			
 
				+module pipeline_casc #(parameter STAGES=5, N=32) (clk, rst, x, y);
			
 
				+    input clk, rst;
			
 
				+    input [N-1:0] x;
			
 
				+    output logic [N-1:0] y;
			
 
				+    reg [N-1:0] stages [STAGES-1:0];
			
 
				+
			
 
				+    assign y = stages[STAGES-1];
			
 
				+
			
 
				+    genvar k;
			
 
				+    generate
			
 
				+        for (k=0; k<STAGES; k++) begin : gen_stages
			
 
				+            always_ff @(posedge clk) begin
			
 
				+                if(rst) begin
			
 
				+                    stages[k] <= 0;
			
 
				+                end else begin
			
 
				+                    if(k == 0) stages[k] <= x;
			
 
				+                    else stages[k] <= stages[k-1];
			
 
				+                end
			
 
				+            end
			
 
				+        end
			
 
				+    endgenerate
			
 
				+
			
 
				+endmodule : pipeline_casc
			
 
				+
			
 
				 module hard_sigmoid_p #(parameter N=32)(clk, rst, x, y);
			
 
				     input clk, rst;
			
 
				     input [N-1:0] x;
			
 
				     output logic [N-1:0] y;
			
 
				-    logic [N-1:0] value, comp_result;
			
 
				+    logic [N-1:0] comp_result, value;
			
 
				 
			
 
				     logic gt_neg0, gt_neg1;
			
 
				     logic lt_pos0, lt_pos1;
			
@@ -147,37 +177,68 @@ module hard_sigmoid_p #(parameter N=32)(clk, rst, x, y);
 
				         .clk(clk),
			
 
				         .rst(rst),
			
 
				         .x0('h40200000),
			
 
				-        .x1(value),
			
 
				+        .x1(x),
			
 
				         .y(join_value)
			
 
				     );
			
 
				 
			
 
				+    pipeline_casc #(.STAGES(`PP_FP_MULT + `PP_FP_ADDER + 1), .N(N)) delay0(
			
 
				+        .clk(clk),
			
 
				+        .rst(rst),
			
 
				+        .x(x),
			
 
				+        .y(value)
			
 
				+    );
			
 
				+
			
 
				     neural_comp_gt gt0(value, 'hc0200000, gt_neg0); // more then -2.5
			
 
				     neural_comp_lt lt0(value, 'h40200000, lt_pos0); // less then +2.5
			
 
				 
			
 
				-    always_ff @(posedge clk) begin
			
 
				-        gt_neg1 <= gt_neg0;
			
 
				-        lt_pos1 <= lt_pos0;
			
 
				-        if(~gt_neg1) begin
			
 
				-            y <= 0;
			
 
				-        end
			
 
				-        // if in between -2.5 and 2.5
			
 
				-        else if(gt_neg1 & lt_pos1) begin
			
 
				-            y <= comp_result;
			
 
				-        end
			
 
				-        // if more than 2.5 ouput 1
			
 
				-        else begin
			
 
				-            y <= 'h3f800000;
			
 
				-        end
			
 
				+    always_comb begin
			
 
				+        if(~gt_neg1 | rst)
			
 
				+            y = 0;
			
 
				+        else if(gt_neg1 & lt_pos1)  // if in between -2.5 and 2.5
			
 
				+            y = comp_result;
			
 
				+        else  // if more than 2.5 ouput 1
			
 
				+            y = 'h3f800000;
			
 
				+    end
			
 
				 
			
 
				+    always_ff @(posedge clk) begin
			
 
				         if(rst) begin
			
 
				             gt_neg1 <= 0;
			
 
				             lt_pos1 <= 0;
			
 
				+        end else begin
			
 
				+            gt_neg1 <= gt_neg0;
			
 
				+            lt_pos1 <= lt_pos0;
			
 
				         end
			
 
				     end
			
 
				 
			
 
				 endmodule : hard_sigmoid_p
			
 
				 
			
 
				 
			
 
				+module hard_sigmoid_p_tb;
			
 
				+    reg rst, clk;
			
 
				+    reg [31:0] x;
			
 
				+    wire [31:0] y;
			
 
				+
			
 
				+    hard_sigmoid_p sigmoid0(clk, rst, x, y);
			
 
				+
			
 
				+    reg [31:0] test_mem [5000:0];
			
 
				+    initial $readmemh("scripts/sigmoid_test.hex", test_mem);
			
 
				+
			
 
				+    initial forever #5 clk = ~clk;
			
 
				+    initial begin
			
 
				+        clk = 0;
			
 
				+        rst = 1;
			
 
				+        # 15;
			
 
				+        rst = 0;
			
 
				+        for (int i=0; i < $size(test_mem); i++) begin
			
 
				+            x = test_mem[i];
			
 
				+            #10;
			
 
				+        end
			
 
				+        $finish();
			
 
				+    end
			
 
				+
			
 
				+endmodule : hard_sigmoid_p_tb
			
 
				+
			
 
				+
			
 
				 module hard_sigmoid_tb;
			
 
				     reg rst, clk;
			
 
				     reg [31:0] x;
			
--- a/src/root.sv
+++ b/src/root.sv
@@ -2,6 +2,8 @@
 
				 `timescale 1 ps / 1 ps
			
 
				 // synopsys translate_on
			
 
				 
			
 
				+`include "neural/neural.sv"
			
 
				+
			
 
				 module root(
			
 
				     input  clk,
			
 
				     input  [1:0] keys,
			
@@ -21,6 +23,8 @@ module root(
 
				         .locked(pll_lock)
			
 
				     );
			
 
				 
			
 
				+    neural_network_encoder nn0(mclk, reset, 32'd0);
			
 
				+
			
 
				 endmodule : root