Min пре 4 година
родитељ
комит
2d55a99e3c
4 измењених фајлова са 284 додато и 10 уклоњено
  1. 53 3
      simulation/modelsim/wave_fpu16_tb.do
  2. 223 0
      src/fpu16/fp_mult.sv
  3. 5 5
      src/fpu16/fp_product.sv
  4. 3 2
      src/fpu16/fpu16.sv

+ 53 - 3
simulation/modelsim/wave_fpu16_tb.do

@@ -22,10 +22,60 @@ add wave -noupdate -label {RESULT ADD} -radix hexadecimal -childformat {{{/fpu16
 add wave -noupdate -label {EXPECTED ADD} -radix hexadecimal /fpu16_tb/expected_add
 add wave -noupdate -label {RESULT MULT} -radix hexadecimal /fpu16_tb/result_mult
 add wave -noupdate -label {EXPECTED MULT} -radix hexadecimal /fpu16_tb/expected_mult
+add wave -noupdate -expand -group Multipler -radix unsigned /fpu16_tb/multiplier1/input_a
+add wave -noupdate -expand -group Multipler -radix unsigned /fpu16_tb/multiplier1/input_b
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/a_e0
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/b_e0
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/a_m0
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/b_m0
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/a_s0
+add wave -noupdate -expand -group Multipler -expand -group {stage 0} -radix unsigned /fpu16_tb/multiplier1/b_s0
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/a_e1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/b_e1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/a_m1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/b_m1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/z_s1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/a_z1w
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/b_z1w
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/a_z1
+add wave -noupdate -expand -group Multipler -expand -group {stage 1} -radix unsigned /fpu16_tb/multiplier1/b_z1
+add wave -noupdate -expand -group Multipler -expand -group {stage 2} -radix unsigned /fpu16_tb/multiplier1/a_e2
+add wave -noupdate -expand -group Multipler -expand -group {stage 2} -radix unsigned /fpu16_tb/multiplier1/b_e2
+add wave -noupdate -expand -group Multipler -expand -group {stage 2} -radix unsigned /fpu16_tb/multiplier1/a_m2
+add wave -noupdate -expand -group Multipler -expand -group {stage 2} -radix unsigned /fpu16_tb/multiplier1/b_m2
+add wave -noupdate -expand -group Multipler -expand -group {stage 2} -radix unsigned /fpu16_tb/multiplier1/z_s2
+add wave -noupdate -expand -group Multipler -expand -group {stage 3} -radix unsigned /fpu16_tb/multiplier1/z_s3
+add wave -noupdate -expand -group Multipler -expand -group {stage 3} -radix unsigned /fpu16_tb/multiplier1/z_e3
+add wave -noupdate -expand -group Multipler -expand -group {stage 3} -radix unsigned /fpu16_tb/multiplier1/z_p3
+add wave -noupdate -expand -group Multipler -expand -group {stage 3} -radix unsigned /fpu16_tb/multiplier1/z_p3w
+add wave -noupdate -expand -group Multipler -expand -group {stage 3} -radix unsigned /fpu16_tb/multiplier1/p3
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_s4
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_e4
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_m4
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_bits4
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_sticky4
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_z4w
+add wave -noupdate -expand -group Multipler -expand -group {stage 4} -radix unsigned /fpu16_tb/multiplier1/z_z4
+add wave -noupdate -expand -group Multipler -expand -group {stage 5} -radix unsigned /fpu16_tb/multiplier1/z_s5
+add wave -noupdate -expand -group Multipler -expand -group {stage 5} -radix unsigned /fpu16_tb/multiplier1/z_e5
+add wave -noupdate -expand -group Multipler -expand -group {stage 5} -radix unsigned /fpu16_tb/multiplier1/z_m5
+add wave -noupdate -expand -group Multipler -expand -group {stage 5} -radix unsigned /fpu16_tb/multiplier1/z_bits5
+add wave -noupdate -expand -group Multipler -expand -group {stage 5} -radix unsigned /fpu16_tb/multiplier1/z_sticky5
+add wave -noupdate -expand -group Multipler -expand -group {stage 6} -radix unsigned /fpu16_tb/multiplier1/z_s6
+add wave -noupdate -expand -group Multipler -expand -group {stage 6} -radix unsigned /fpu16_tb/multiplier1/z_e6
+add wave -noupdate -expand -group Multipler -expand -group {stage 6} -radix unsigned /fpu16_tb/multiplier1/z_m6
+add wave -noupdate -expand -group Multipler -expand -group {stage 6} -radix unsigned /fpu16_tb/multiplier1/z_bits6
+add wave -noupdate -expand -group Multipler -expand -group {stage 6} -radix unsigned /fpu16_tb/multiplier1/z_sticky6
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -radix unsigned /fpu16_tb/multiplier1/z_s7
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -color Coral -radix unsigned /fpu16_tb/exp1_s
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -radix unsigned /fpu16_tb/multiplier1/z_e7
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -color Coral -radix unsigned /fpu16_tb/exp1_e
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -radix unsigned /fpu16_tb/multiplier1/z_m7
+add wave -noupdate -expand -group Multipler -expand -group {stage 7} -color Coral -radix unsigned /fpu16_tb/exp1_m
 TreeUpdate [SetDefaultTree]
-WaveRestoreCursors {{Cursor 1} {222 ps} 0}
+WaveRestoreCursors {{Cursor 1} {82 ps} 0}
 quietly wave cursor active 1
-configure wave -namecolwidth 204
+configure wave -namecolwidth 236
 configure wave -valuecolwidth 100
 configure wave -justifyvalue left
 configure wave -signalnamewidth 0
@@ -39,4 +89,4 @@ configure wave -griddelta 40
 configure wave -timeline 0
 configure wave -timelineunits ns
 update
-WaveRestoreZoom {0 ps} {59 ps}
+WaveRestoreZoom {56 ps} {114 ps}

+ 223 - 0
src/fpu16/fp_mult.sv

@@ -0,0 +1,223 @@
+
+
+module lead_zeros_enc #(K=10)(in, out);
+    localparam L = $clog2(K);
+
+    input wire [K-1:0] in;
+    output logic [L-1:0] out;
+    always_comb begin
+        casez (in)
+            10'b00000_00000 :out = 4'd10;
+            10'b00000_0000? :out = 4'd9;
+            10'b00000_000?? :out = 4'd8;
+            10'b00000_00??? :out = 4'd7;
+            10'b00000_0???? :out = 4'd6;
+            10'b00000_????? :out = 4'd5;
+            10'b0000?_????? :out = 4'd4;
+            10'b000??_????? :out = 4'd3;
+            10'b00???_????? :out = 4'd2;
+            10'b0????_????? :out = 4'd1;
+            default : out = 4'd0;
+        endcase
+    end
+endmodule : lead_zeros_enc
+
+module fp_mult #(parameter N=16, M=5)(input_a, input_b, output_z, clk, reset);
+    localparam K=N-M-1;  // Size of mantissa
+    localparam L=$clog2(10);  // Size of shifting representation
+
+    input logic [N-1:0] input_a, input_b;
+    input logic clk, reset;
+    output logic [N-1:0] output_z;
+
+    // ====================
+    // Stage 0 store input
+    // ====================
+
+    reg [M-1:0] a_e0, b_e0;
+    reg [K-1:0] a_m0, b_m0;
+    reg  a_s0, b_s0;
+
+    always_ff @(posedge clk) begin
+        {a_s0, a_e0, a_m0} <= input_a;
+        {b_s0, b_e0, b_m0} <= input_b;
+    end
+
+    // ====================
+    // Stage 1 denormalise0
+    // ====================
+
+    reg [M-1:0] a_e1, b_e1;
+    reg [K-1:0] a_m1, b_m1;
+    reg z_s1;
+
+    // leading zeros
+    wire [L-1:0] a_z1w, b_z1w;
+    reg [L-1:0] a_z1, b_z1;
+
+    lead_zeros_enc #(.K(K)) lz_a1(a_m1, a_z1w);
+    lead_zeros_enc #(.K(K)) lz_b1(b_m1, b_z1w);
+
+    always_ff @(posedge clk) begin
+        z_s1 <= a_s0 ^ b_s0;  // We don't need to track and propagate signs
+        {a_e1, a_m1} <= {a_e0, a_m0};
+        {b_e1, b_m1} <= {b_e0, b_m0};
+        a_z1 <= a_z1w;
+        b_z1 <= b_z1w;
+    end
+
+    // ====================
+    // Stage 2 denormalise1
+    // ====================
+
+    reg [M-1:0] a_e2, b_e2;
+    reg [K-1:0] a_m2, b_m2;
+    reg z_s2;
+
+    always_ff @(posedge clk) begin
+        a_e2 <= a_e1 - a_z1;
+        a_m2 <= a_e1 << a_z1;
+
+        b_e2 <= b_e1 - b_z1;
+        b_m2 <= b_m1 << b_z1;
+
+        z_s2 <= z_s1;
+    end
+
+    // ====================
+    // Stage 3 product
+    // ====================
+
+    reg z_s3;
+    reg [M-1:0] z_e3;
+    reg [K*2-1:0] z_p3;  // product is double mantissa
+    wire [K*2-1:0] z_p3w;
+
+    assign z_p3w = a_m2 * b_m2;
+
+    always_ff @(posedge clk) begin
+        z_e3 <= a_e2 + b_e2 + 1;
+        z_p3 <= a_m2 * b_m2;
+
+        z_s3 <= z_s2;
+    end
+
+    // ====================
+    // Stage 4 unpack/normalise0
+    // ====================
+
+    reg z_s4;
+    reg [M-1:0] z_e4;
+    reg [K-1:0] z_m4;
+    reg [1:0] z_bits4; // guard and round_bit
+    reg z_sticky4;
+
+
+    wire [K-1:0] p3;
+    // leading zeros
+    wire [3:0] z_z4w;
+    reg [3:0] z_z4;
+
+    assign p3 = z_p3[K*2-1:K];
+    lead_zeros_enc #(.K(K)) lz_p3(p3, z_z4w);
+
+    always_ff @(posedge clk) begin
+        z_m4 <= p3;
+        z_bits4 <= z_p3[K:K-2];
+        z_sticky4 <= z_p3[K-2:0] != 0;
+        z_z4 <= z_z4w;
+
+        z_e4 <= z_e3;
+        z_s4 <= z_s3;
+    end
+
+    // ====================
+    // Stage 5 normalise1
+    // ====================
+
+    reg z_s5;
+    reg [M-1:0] z_e5;
+    reg [K-1:0] z_m5;
+
+    reg [1:0] z_bits5; // guard and round_bit
+    reg z_sticky5;
+
+    always_ff @(posedge clk) begin
+        z_e5 <= z_e4 - z_z4;
+        z_m5 <= z_m4 << z_z4;
+
+        case (z_z4)
+            4'd0: begin
+                z_bits5 <= z_bits4;
+            end
+            4'd1: begin
+                z_m5[0] <= z_bits4[0];
+                z_bits5 <= {z_bits4[1], 1'b0};
+            end
+            4'd2: begin
+                z_m5[1-:2] <= z_bits4;
+                z_bits5 <= 2'b00;
+            end
+            default : begin
+                z_m5[z_z4-1-:2] <= z_bits4;
+                z_bits5 <= 2'b00;
+            end
+        endcase
+
+        z_sticky5 <= z_sticky4;
+        z_s5 <= z_s4;
+    end
+
+    // ====================
+    // Stage 6 normalise2
+    // ====================
+    reg z_s6;
+    reg [M-1:0] z_e6;
+    reg [K-1:0] z_m6;
+
+    reg [1:0] z_bits6;
+    reg z_sticky6;
+
+
+    always_ff @(posedge clk) begin
+        // TODO: Fix this stage
+        // if ($signed(z_e5) < -126) begin
+        //     shift6 <= $signed(z_e5) -126;
+        // end else begin
+        //     shift6 <= 0;
+        // end
+
+        z_s6 <= z_s5;
+        z_e6 <= z_e5;
+        z_m6 <= z_m5;
+        z_bits6 <= z_bits5;
+        z_sticky6 <= z_sticky5;
+    end
+
+    // ====================
+    // Stage 7 round
+    // ====================
+    reg z_s7;
+    reg [M-1:0] z_e7;
+    reg [K-1:0] z_m7;
+
+    always_ff @(posedge clk) begin
+        if (z_bits6[0] && (z_bits6[1] | z_sticky6 | z_m6[0])) begin
+            z_m7 <= z_m6 + 1;
+            // TODO: Check for all 1s
+        end else begin
+            z_m7 <= z_m6;
+        end
+        z_e7 <= z_e6;
+        z_s7 <= z_s6;
+    end
+
+    // ====================
+    // Stage 8 pack
+    // ====================
+    // This stage is skipped as it checks for overflow
+
+
+    always_comb output_z = {z_s7, z_e7, z_m7};
+
+endmodule : fp_mult

+ 5 - 5
src/fpu16/fp_product.sv

@@ -63,6 +63,11 @@ module fp_product #(parameter N=16, M=5)(input_a, input_b, output_z, clk, reset)
 			z_m2 <= z_m1;
 			z_m3 <= z_m2;
 
+			z_s3 <= a_s2 ^ b_s2; //signs xor together
+			z_e3 <= a_e2 + b_e2 - (1<<M); // exponents added together subtracting one offset
+			z_p3 <= a_m2 * b_m2; // mantissa multiplied together and the most significant bits are stored in the output mantissa
+			z_m4 <= z_p3[2*K-1:K];
+
 			// If input a or input b is NaN then return NaN
 			if (((a_e0 ==(1<<M)) && (a_m0 != 0)) || ((b_e0 == (1<<M)) && (b_m0 != 0)))
 			begin
@@ -165,11 +170,6 @@ module fp_product #(parameter N=16, M=5)(input_a, input_b, output_z, clk, reset)
 				z_m4 <= z_m3;
 				z_p3 <= 0;
 			end
-
-            z_s3 <= a_s2 ^ b_s2; //signs xor together
-            z_e3 <= a_e2 + b_e2 - (1<<M); // exponents added together subtracting one offset
-            z_p3 <= a_m2 * b_m2; // mantissa multiplied together and the most significant bits are stored in the output mantissa
-            z_m4 <= z_p3[2*K-1:K];
 		end
 		else
 		begin

+ 3 - 2
src/fpu16/fpu16.sv

@@ -1,5 +1,6 @@
 `include "fp_adder.sv"
 `include "fp_product.sv"
+`include "fp_mult.sv"
 
 
 module fpu16_tb;
@@ -12,11 +13,11 @@ module fpu16_tb;
 
 	fp_adder adder1(.input_a(input_a), .input_b(input_b), .output_z(result_add), .clk(clk), .reset(reset));
 
-	fp_product multiplier1(.input_a(input_a), .input_b(input_b), .output_z(result_mult), .clk(clk), .reset(reset));
+	fp_mult multiplier1(.input_a(input_a), .input_b(input_b), .output_z(result_mult), .clk(clk), .reset(reset));
 	
 	initial forever #5 clk = ~clk;
 	localparam PIPELINES_ADD = 2;
-	localparam PIPELINES_MUL = 4;
+	localparam PIPELINES_MUL = 7;
 
 	reg [15:0] test_mem [29:0][3:0];