|
|
@@ -0,0 +1,223 @@
|
|
|
+
|
|
|
+
|
|
|
+module lead_zeros_enc #(K=10)(in, out);
|
|
|
+ localparam L = $clog2(K);
|
|
|
+
|
|
|
+ input wire [K-1:0] in;
|
|
|
+ output logic [L-1:0] out;
|
|
|
+ always_comb begin
|
|
|
+ casez (in)
|
|
|
+ 10'b00000_00000 :out = 4'd10;
|
|
|
+ 10'b00000_0000? :out = 4'd9;
|
|
|
+ 10'b00000_000?? :out = 4'd8;
|
|
|
+ 10'b00000_00??? :out = 4'd7;
|
|
|
+ 10'b00000_0???? :out = 4'd6;
|
|
|
+ 10'b00000_????? :out = 4'd5;
|
|
|
+ 10'b0000?_????? :out = 4'd4;
|
|
|
+ 10'b000??_????? :out = 4'd3;
|
|
|
+ 10'b00???_????? :out = 4'd2;
|
|
|
+ 10'b0????_????? :out = 4'd1;
|
|
|
+ default : out = 4'd0;
|
|
|
+ endcase
|
|
|
+ end
|
|
|
+endmodule : lead_zeros_enc
|
|
|
+
|
|
|
+module fp_mult #(parameter N=16, M=5)(input_a, input_b, output_z, clk, reset);
|
|
|
+ localparam K=N-M-1; // Size of mantissa
|
|
|
+ localparam L=$clog2(10); // Size of shifting representation
|
|
|
+
|
|
|
+ input logic [N-1:0] input_a, input_b;
|
|
|
+ input logic clk, reset;
|
|
|
+ output logic [N-1:0] output_z;
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 0 store input
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg [M-1:0] a_e0, b_e0;
|
|
|
+ reg [K-1:0] a_m0, b_m0;
|
|
|
+ reg a_s0, b_s0;
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ {a_s0, a_e0, a_m0} <= input_a;
|
|
|
+ {b_s0, b_e0, b_m0} <= input_b;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 1 denormalise0
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg [M-1:0] a_e1, b_e1;
|
|
|
+ reg [K-1:0] a_m1, b_m1;
|
|
|
+ reg z_s1;
|
|
|
+
|
|
|
+ // leading zeros
|
|
|
+ wire [L-1:0] a_z1w, b_z1w;
|
|
|
+ reg [L-1:0] a_z1, b_z1;
|
|
|
+
|
|
|
+ lead_zeros_enc #(.K(K)) lz_a1(a_m1, a_z1w);
|
|
|
+ lead_zeros_enc #(.K(K)) lz_b1(b_m1, b_z1w);
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ z_s1 <= a_s0 ^ b_s0; // We don't need to track and propagate signs
|
|
|
+ {a_e1, a_m1} <= {a_e0, a_m0};
|
|
|
+ {b_e1, b_m1} <= {b_e0, b_m0};
|
|
|
+ a_z1 <= a_z1w;
|
|
|
+ b_z1 <= b_z1w;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 2 denormalise1
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg [M-1:0] a_e2, b_e2;
|
|
|
+ reg [K-1:0] a_m2, b_m2;
|
|
|
+ reg z_s2;
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ a_e2 <= a_e1 - a_z1;
|
|
|
+ a_m2 <= a_e1 << a_z1;
|
|
|
+
|
|
|
+ b_e2 <= b_e1 - b_z1;
|
|
|
+ b_m2 <= b_m1 << b_z1;
|
|
|
+
|
|
|
+ z_s2 <= z_s1;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 3 product
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg z_s3;
|
|
|
+ reg [M-1:0] z_e3;
|
|
|
+ reg [K*2-1:0] z_p3; // product is double mantissa
|
|
|
+ wire [K*2-1:0] z_p3w;
|
|
|
+
|
|
|
+ assign z_p3w = a_m2 * b_m2;
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ z_e3 <= a_e2 + b_e2 + 1;
|
|
|
+ z_p3 <= a_m2 * b_m2;
|
|
|
+
|
|
|
+ z_s3 <= z_s2;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 4 unpack/normalise0
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg z_s4;
|
|
|
+ reg [M-1:0] z_e4;
|
|
|
+ reg [K-1:0] z_m4;
|
|
|
+ reg [1:0] z_bits4; // guard and round_bit
|
|
|
+ reg z_sticky4;
|
|
|
+
|
|
|
+
|
|
|
+ wire [K-1:0] p3;
|
|
|
+ // leading zeros
|
|
|
+ wire [3:0] z_z4w;
|
|
|
+ reg [3:0] z_z4;
|
|
|
+
|
|
|
+ assign p3 = z_p3[K*2-1:K];
|
|
|
+ lead_zeros_enc #(.K(K)) lz_p3(p3, z_z4w);
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ z_m4 <= p3;
|
|
|
+ z_bits4 <= z_p3[K:K-2];
|
|
|
+ z_sticky4 <= z_p3[K-2:0] != 0;
|
|
|
+ z_z4 <= z_z4w;
|
|
|
+
|
|
|
+ z_e4 <= z_e3;
|
|
|
+ z_s4 <= z_s3;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 5 normalise1
|
|
|
+ // ====================
|
|
|
+
|
|
|
+ reg z_s5;
|
|
|
+ reg [M-1:0] z_e5;
|
|
|
+ reg [K-1:0] z_m5;
|
|
|
+
|
|
|
+ reg [1:0] z_bits5; // guard and round_bit
|
|
|
+ reg z_sticky5;
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ z_e5 <= z_e4 - z_z4;
|
|
|
+ z_m5 <= z_m4 << z_z4;
|
|
|
+
|
|
|
+ case (z_z4)
|
|
|
+ 4'd0: begin
|
|
|
+ z_bits5 <= z_bits4;
|
|
|
+ end
|
|
|
+ 4'd1: begin
|
|
|
+ z_m5[0] <= z_bits4[0];
|
|
|
+ z_bits5 <= {z_bits4[1], 1'b0};
|
|
|
+ end
|
|
|
+ 4'd2: begin
|
|
|
+ z_m5[1-:2] <= z_bits4;
|
|
|
+ z_bits5 <= 2'b00;
|
|
|
+ end
|
|
|
+ default : begin
|
|
|
+ z_m5[z_z4-1-:2] <= z_bits4;
|
|
|
+ z_bits5 <= 2'b00;
|
|
|
+ end
|
|
|
+ endcase
|
|
|
+
|
|
|
+ z_sticky5 <= z_sticky4;
|
|
|
+ z_s5 <= z_s4;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 6 normalise2
|
|
|
+ // ====================
|
|
|
+ reg z_s6;
|
|
|
+ reg [M-1:0] z_e6;
|
|
|
+ reg [K-1:0] z_m6;
|
|
|
+
|
|
|
+ reg [1:0] z_bits6;
|
|
|
+ reg z_sticky6;
|
|
|
+
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ // TODO: Fix this stage
|
|
|
+ // if ($signed(z_e5) < -126) begin
|
|
|
+ // shift6 <= $signed(z_e5) -126;
|
|
|
+ // end else begin
|
|
|
+ // shift6 <= 0;
|
|
|
+ // end
|
|
|
+
|
|
|
+ z_s6 <= z_s5;
|
|
|
+ z_e6 <= z_e5;
|
|
|
+ z_m6 <= z_m5;
|
|
|
+ z_bits6 <= z_bits5;
|
|
|
+ z_sticky6 <= z_sticky5;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 7 round
|
|
|
+ // ====================
|
|
|
+ reg z_s7;
|
|
|
+ reg [M-1:0] z_e7;
|
|
|
+ reg [K-1:0] z_m7;
|
|
|
+
|
|
|
+ always_ff @(posedge clk) begin
|
|
|
+ if (z_bits6[0] && (z_bits6[1] | z_sticky6 | z_m6[0])) begin
|
|
|
+ z_m7 <= z_m6 + 1;
|
|
|
+ // TODO: Check for all 1s
|
|
|
+ end else begin
|
|
|
+ z_m7 <= z_m6;
|
|
|
+ end
|
|
|
+ z_e7 <= z_e6;
|
|
|
+ z_s7 <= z_s6;
|
|
|
+ end
|
|
|
+
|
|
|
+ // ====================
|
|
|
+ // Stage 8 pack
|
|
|
+ // ====================
|
|
|
+ // This stage is skipped as it checks for overflow
|
|
|
+
|
|
|
+
|
|
|
+ always_comb output_z = {z_s7, z_e7, z_m7};
|
|
|
+
|
|
|
+endmodule : fp_mult
|