fp_mult.sv 6.6 KB


  1. `include "lead_zero.sv"
  2. module lead_zeros_enc #(K=10)(in, out);
  3. localparam L = $clog2(K) +1;
  4. input wire [K-1:0] in;
  5. output logic [L-1:0] out;
  6. always_comb begin
  7. if(K == 10) begin
  8. casez (in)
  9. 10'b00000_00000 :out = 4'd10;
  10. 10'b00000_0000? :out = 4'd9;
  11. 10'b00000_000?? :out = 4'd8;
  12. 10'b00000_00??? :out = 4'd7;
  13. 10'b00000_0???? :out = 4'd6;
  14. 10'b00000_????? :out = 4'd5;
  15. 10'b0000?_????? :out = 4'd4;
  16. 10'b000??_????? :out = 4'd3;
  17. 10'b00???_????? :out = 4'd2;
  18. 10'b0????_????? :out = 4'd1;
  19. default : out = 4'd0;
  20. endcase
  21. end
  22. else if (K == 23) begin
  23. casez (in)
  24. 23'b000_00000_00000_00000_00000 :out = 5'd23;
  25. 23'b000_00000_00000_00000_0000? :out = 5'd22;
  26. 23'b000_00000_00000_00000_000?? :out = 5'd21;
  27. 23'b000_00000_00000_00000_00??? :out = 5'd20;
  28. 23'b000_00000_00000_00000_0???? :out = 5'd19;
  29. 23'b000_00000_00000_00000_????? :out = 5'd18;
  30. 23'b000_00000_00000_0000?_????? :out = 5'd17;
  31. 23'b000_00000_00000_000??_????? :out = 5'd16;
  32. 23'b000_00000_00000_00???_????? :out = 5'd15;
  33. 23'b000_00000_00000_0????_????? :out = 5'd14;
  34. 23'b000_00000_00000_?????_????? :out = 5'd13;
  35. 23'b000_00000_0000?_?????_????? :out = 5'd12;
  36. 23'b000_00000_000??_?????_????? :out = 5'd11;
  37. 23'b000_00000_00???_?????_????? :out = 5'd10;
  38. 23'b000_00000_0????_?????_????? :out = 5'd9;
  39. 23'b000_00000_?????_?????_????? :out = 5'd8;
  40. 23'b000_0000?_?????_?????_????? :out = 5'd7;
  41. 23'b000_000??_?????_?????_????? :out = 5'd6;
  42. 23'b000_00???_?????_?????_????? :out = 5'd5;
  43. 23'b000_0????_?????_?????_????? :out = 5'd4;
  44. 23'b000_?????_?????_?????_????? :out = 5'd3;
  45. 23'b00?_?????_?????_?????_????? :out = 5'd2;
  46. 23'b0??_?????_?????_?????_????? :out = 5'd1;
  47. default : out = 5'd0;
  48. endcase
  49. end
  50. end
  51. endmodule : lead_zeros_enc
  52. module fp_mult #(parameter N=16, M=5)(input_a, input_b, output_z, clk, reset);
  53. localparam K=N-M-1; // Size of mantissa
  54. localparam L=$clog2(10); // Size of shifting representation
  55. input logic [N-1:0] input_a, input_b;
  56. input logic clk, reset;
  57. output logic [N-1:0] output_z;
  58. // ====================
  59. // Stage 0 store input
  60. // ====================
  61. reg [M-1:0] a_e0, b_e0;
  62. reg [K-1:0] a_m0, b_m0;
  63. reg a_s0, b_s0;
  64. always_ff @(posedge clk) begin
  65. {a_s0, a_e0, a_m0} <= input_a;
  66. {b_s0, b_e0, b_m0} <= input_b;
  67. end
  68. // ====================
  69. // Stage 1 denormalise0
  70. // ====================
  71. reg [M-1:0] a_e1, b_e1;
  72. reg [K-1:0] a_m1, b_m1;
  73. reg z_s1;
  74. // leading zeros
  75. wire [L-1:0] a_z1w, b_z1w;
  76. reg [L-1:0] a_z1, b_z1;
  77. lead_zeros_enc #(.K(K)) lz_a1(a_m1, a_z1w);
  78. lead_zeros_enc #(.K(K)) lz_b1(b_m1, b_z1w);
  79. always_ff @(posedge clk) begin
  80. z_s1 <= a_s0 ^ b_s0; // We don't need to track and propagate signs
  81. {a_e1, a_m1} <= {a_e0, a_m0};
  82. {b_e1, b_m1} <= {b_e0, b_m0};
  83. a_z1 <= a_z1w;
  84. b_z1 <= b_z1w;
  85. end
  86. // ====================
  87. // Stage 2 denormalise1
  88. // ====================
  89. reg [M-1:0] a_e2, b_e2;
  90. reg [K-1:0] a_m2, b_m2;
  91. reg z_s2;
  92. always_ff @(posedge clk) begin
  93. a_e2 <= a_e1 - a_z1;
  94. a_m2 <= a_m1 << a_z1;
  95. b_e2 <= b_e1 - b_z1;
  96. b_m2 <= b_m1 << b_z1;
  97. z_s2 <= z_s1;
  98. end
  99. // ====================
  100. // Stage 3 product
  101. // ====================
  102. reg z_s3;
  103. reg [M-1:0] z_e3;
  104. reg [K*2-1:0] z_p3; // product is double mantissa
  105. always_ff @(posedge clk) begin
  106. z_e3 <= a_e2 + b_e2 + 1;
  107. z_p3 <= a_m2 * b_m2;
  108. z_s3 <= z_s2;
  109. end
  110. // ====================
  111. // Stage 4 unpack/normalise0
  112. // ====================
  113. reg z_s4;
  114. reg [M-1:0] z_e4;
  115. reg [K-1:0] z_m4;
  116. reg [1:0] z_bits4; // guard and round_bit
  117. reg z_sticky4;
  118. wire [K-1:0] p3;
  119. // leading zeros
  120. wire [3:0] z_z4w;
  121. reg [3:0] z_z4;
  122. assign p3 = z_p3[K*2-1:K];
  123. lead_zeros_enc #(.K(K)) lz_p3(p3, z_z4w);
  124. always_ff @(posedge clk) begin
  125. z_m4 <= p3;
  126. z_bits4 <= z_p3[K:K-2];
  127. z_sticky4 <= z_p3[K-2:0] != 0;
  128. z_z4 <= z_z4w;
  129. z_e4 <= z_e3;
  130. z_s4 <= z_s3;
  131. end
  132. // ====================
  133. // Stage 5 normalise1
  134. // ====================
  135. reg z_s5;
  136. reg [M-1:0] z_e5;
  137. reg [K-1:0] z_m5;
  138. reg [1:0] z_bits5; // guard and round_bit
  139. reg z_sticky5;
  140. always_ff @(posedge clk) begin
  141. z_e5 <= z_e4 - z_z4;
  142. z_m5 <= z_m4 << z_z4;
  143. case (z_z4)
  144. 4'd0: begin
  145. z_bits5 <= z_bits4;
  146. end
  147. 4'd1: begin
  148. z_m5[0] <= z_bits4[0];
  149. z_bits5 <= {z_bits4[1], 1'b0};
  150. end
  151. 4'd2: begin
  152. z_m5[1-:2] <= z_bits4; // ?
  153. z_bits5 <= 2'b00;
  154. end
  155. default : begin
  156. z_m5[z_z4-1-:2] <= z_bits4; // ?
  157. z_bits5 <= 2'b00;
  158. end
  159. endcase
  160. z_sticky5 <= z_sticky4;
  161. z_s5 <= z_s4;
  162. end
  163. // ====================
  164. // Stage 6 normalise2
  165. // ====================
  166. reg z_s6;
  167. reg [M-1:0] z_e6;
  168. reg [K-1:0] z_m6;
  169. reg [1:0] z_bits6;
  170. reg z_sticky6;
  171. always_ff @(posedge clk) begin
  172. // TODO: Fix this stage
  173. // if ($signed(z_e5) < -126) begin
  174. // shift6 <= $signed(z_e5) -126;
  175. // end else begin
  176. // shift6 <= 0;
  177. // end
  178. z_s6 <= z_s5;
  179. z_e6 <= z_e5;
  180. z_m6 <= z_m5;
  181. z_bits6 <= z_bits5;
  182. z_sticky6 <= z_sticky5;
  183. end
  184. // ====================
  185. // Stage 7 round
  186. // ====================
  187. reg z_s7;
  188. reg [M-1:0] z_e7;
  189. reg [K-1:0] z_m7;
  190. always_ff @(posedge clk) begin
  191. if (z_bits6[0] && (z_bits6[1] | z_sticky6 | z_m6[0])) begin
  192. z_m7 <= z_m6 + 1;
  193. // TODO: Check for all 1s
  194. end else begin
  195. z_m7 <= z_m6;
  196. end
  197. z_e7 <= z_e6;
  198. z_s7 <= z_s6;
  199. end
  200. // ====================
  201. // Stage 8 pack
  202. // ====================
  203. // This stage is skipped as it checks for overflow
  204. always_comb output_z = {z_s7, z_e7, z_m7};
  205. endmodule : fp_mult