пре 5 година · ff722bedc5
--- a/altera_devel.qsf
+++ b/altera_devel.qsf
@@ -85,7 +85,9 @@ set_global_assignment -name EDA_TEST_BENCH_ENABLE_STATUS TEST_BENCH_MODE -sectio
 
				 set_global_assignment -name EDA_NATIVELINK_SIMULATION_TEST_BENCH root_tb -section_id eda_simulation
			
 
				 set_global_assignment -name EDA_TEST_BENCH_NAME root_tb -section_id eda_simulation
			
 
				 set_global_assignment -name EDA_DESIGN_INSTANCE_NAME NA -section_id root_tb
			
 
				-set_global_assignment -name EDA_TEST_BENCH_MODULE_NAME root_tb -section_id root_tb
			
 
				-set_global_assignment -name EDA_TEST_BENCH_FILE src/root.sv -section_id root_tb
			
 
				+set_global_assignment -name EDA_TEST_BENCH_MODULE_NAME fpu32_tb -section_id root_tb
			
 
				 set_global_assignment -name SOURCE_FILE db/altera_devel.cmp.rdb
			
 
				+set_global_assignment -name EDA_TEST_BENCH_FILE src/root.sv -section_id root_tb
			
 
				+set_global_assignment -name EDA_TEST_BENCH_FILE src/fpu32/fpu32.sv -section_id root_tb
			
 
				+set_global_assignment -name QIP_FILE altera/fpu32_adder.qip
			
 
				 set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top
			
--- a/scripts/fpu_test_gen.py
+++ b/scripts/fpu_test_gen.py
@@ -0,0 +1,32 @@
 
				+import numpy as np
			
 
				+import os
			
 
				+
			
 
				+def generate_fp16_tb(cases, filename, dtype=np.float16):
			
 
				+    dsize = 0
			
 
				+    if dtype == np.float16:
			
 
				+        dsize = 2
			
 
				+    elif dtype == np.float32:
			
 
				+        dsize = 4
			
 
				+    else:
			
 
				+        raise ValueError(f"Unknown dtype {dtype}")
			
 
				+
			
 
				+    x = np.frombuffer(os.urandom(cases * dsize), dtype=dtype)
			
 
				+    y = np.frombuffer(os.urandom(cases * dsize), dtype=dtype)
			
 
				+    sum = x + y
			
 
				+    mul = x * y
			
 
				+    x = x.tobytes()
			
 
				+    y = y.tobytes()
			
 
				+    sum = sum.tobytes()
			
 
				+    mul = mul.tobytes()
			
 
				+    with open(filename, 'w') as f:
			
 
				+        for i in range(cases):
			
 
				+            f.write(' '.join([
			
 
				+                x[i*dsize:i*dsize+dsize].hex(),
			
 
				+                y[i*dsize:i*dsize+dsize].hex(),
			
 
				+                sum[i*dsize:i*dsize+dsize].hex(),
			
 
				+                mul[i*dsize:i*dsize+dsize].hex(),
			
 
				+            ]) + '\n')
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    generate_fp16_tb(30, 'fp32_test.hex', dtype=np.float32)
			
--- a/simulation/modelsim/sim_fpu32.do
+++ b/simulation/modelsim/sim_fpu32.do
@@ -0,0 +1,20 @@
 
				+#transcript on
			
 
				+if {[file exists rtl_work]} {
			
 
				+	vdel -lib rtl_work -all
			
 
				+}
			
 
				+set rootdir [pwd]/../..
			
 
				+puts "Root Directory $rootdir"
			
 
				+vlib rtl_work
			
 
				+vmap work rtl_work
			
 
				+
			
 
				+vlog -sv -work work +incdir+${rootdir}/src ${rootdir}/src/fpu32/fpu32.sv
			
 
				+vsim -t 1ps -L altera_ver -L lpm_ver -L sgate_ver -L altera_mf_ver -L altera_lnsim_ver -L cycloneive_ver -L rtl_work -L work -voptargs="+acc" fpu32_tb
			
 
				+
			
 
				+view structure
			
 
				+view signals
			
 
				+
			
 
				+add wave -noupdate -label CLK /fpu32_tb/clk
			
 
				+add wave -noupdate -label RESET /fpu32_tb/reset
			
 
				+add wave -noupdate -label INPUT_A -radix float32 /fpu32_tb/input_a
			
 
				+add wave -noupdate -label INPUT_B -radix float32 /fpu32_tb/input_b
			
 
				+add wave -noupdate -label RESULT_ADDER -radix float32 /fpu32_tb/result_add
			
--- a/src/fpu32/adder.v
+++ b/src/fpu32/adder.v
@@ -0,0 +1,305 @@
 
				+//IEEE Floating Point Adder (Single Precision)
			
 
				+//Copyright (C) Jonathan P Dawson 2013
			
 
				+//2013-12-12
			
 
				+
			
 
				+module adder(
			
 
				+        input_a,
			
 
				+        input_b,
			
 
				+        input_a_stb,
			
 
				+        input_b_stb,
			
 
				+        output_z_ack,
			
 
				+        clk,
			
 
				+        rst,
			
 
				+        output_z,
			
 
				+        output_z_stb,
			
 
				+        input_a_ack,
			
 
				+        input_b_ack);
			
 
				+
			
 
				+  input     clk;
			
 
				+  input     rst;
			
 
				+
			
 
				+  input     [31:0] input_a;
			
 
				+  input     input_a_stb;
			
 
				+  output    input_a_ack;
			
 
				+
			
 
				+  input     [31:0] input_b;
			
 
				+  input     input_b_stb;
			
 
				+  output    input_b_ack;
			
 
				+
			
 
				+  output    [31:0] output_z;
			
 
				+  output    output_z_stb;
			
 
				+  input     output_z_ack;
			
 
				+
			
 
				+  reg       s_output_z_stb;
			
 
				+  reg       [31:0] s_output_z;
			
 
				+  reg       s_input_a_ack;
			
 
				+  reg       s_input_b_ack;
			
 
				+
			
 
				+  reg       [3:0] state;
			
 
				+  parameter get_a         = 4'd0,
			
 
				+            get_b         = 4'd1,
			
 
				+            unpack        = 4'd2,
			
 
				+            special_cases = 4'd3,
			
 
				+            align         = 4'd4,
			
 
				+            add_0         = 4'd5,
			
 
				+            add_1         = 4'd6,
			
 
				+            normalise_1   = 4'd7,
			
 
				+            normalise_2   = 4'd8,
			
 
				+            round         = 4'd9,
			
 
				+            pack          = 4'd10,
			
 
				+            put_z         = 4'd11,
			
 
				+            get_input     = 4'd12;
			
 
				+
			
 
				+  reg       [31:0] a, b, z;
			
 
				+  reg       [26:0] a_m, b_m;
			
 
				+  reg       [23:0] z_m;
			
 
				+  reg       [9:0] a_e, b_e, z_e;
			
 
				+  reg       a_s, b_s, z_s;
			
 
				+  reg       guard, round_bit, sticky;
			
 
				+  reg       [27:0] sum;
			
 
				+
			
 
				+  always @(posedge clk)
			
 
				+  begin
			
 
				+
			
 
				+    case(state)
			
 
				+
			
 
				+      get_a:
			
 
				+      begin
			
 
				+        s_input_a_ack <= 1;
			
 
				+        if (s_input_a_ack && input_a_stb) begin
			
 
				+          a <= input_a;
			
 
				+          s_input_a_ack <= 0;
			
 
				+          state <= get_b;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      get_input:
			
 
				+      begin
			
 
				+        s_input_a_ack <= 1;
			
 
				+        s_input_b_ack <= 1;
			
 
				+        if (s_input_a_ack && input_a_stb && s_input_b_ack && input_b_stb) begin
			
 
				+          a <= input_a;
			
 
				+          b <= input_b;
			
 
				+          s_input_a_ack <= 0;
			
 
				+          s_input_b_ack <= 0;
			
 
				+          state <= unpack;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      get_b:
			
 
				+      begin
			
 
				+        s_input_b_ack <= 1;
			
 
				+        if (s_input_b_ack && input_b_stb) begin
			
 
				+          b <= input_b;
			
 
				+          s_input_b_ack <= 0;
			
 
				+          state <= unpack;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      unpack:
			
 
				+      begin
			
 
				+        a_m <= {a[22 : 0], 3'd0};
			
 
				+        b_m <= {b[22 : 0], 3'd0};
			
 
				+        a_e <= a[30 : 23] - 127;
			
 
				+        b_e <= b[30 : 23] - 127;
			
 
				+        a_s <= a[31];
			
 
				+        b_s <= b[31];
			
 
				+        state <= special_cases;
			
 
				+      end
			
 
				+
			
 
				+      special_cases:
			
 
				+      begin
			
 
				+        //if a is NaN or b is NaN return NaN
			
 
				+        if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
			
 
				+          z[31] <= 1;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22] <= 1;
			
 
				+          z[21:0] <= 0;
			
 
				+          state <= put_z;
			
 
				+        //if a is inf return inf
			
 
				+        end else if (a_e == 128) begin
			
 
				+          z[31] <= a_s;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22:0] <= 0;
			
 
				+          //if a is inf and signs don't match return nan
			
 
				+          if ((b_e == 128) && (a_s != b_s)) begin
			
 
				+              z[31] <= b_s;
			
 
				+              z[30:23] <= 255;
			
 
				+              z[22] <= 1;
			
 
				+              z[21:0] <= 0;
			
 
				+          end
			
 
				+          state <= put_z;
			
 
				+        //if b is inf return inf
			
 
				+        end else if (b_e == 128) begin
			
 
				+          z[31] <= b_s;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22:0] <= 0;
			
 
				+          state <= put_z;
			
 
				+        //if a is zero return b
			
 
				+        end else if ((($signed(a_e) == -127) && (a_m == 0)) && (($signed(b_e) == -127) && (b_m == 0))) begin
			
 
				+          z[31] <= a_s & b_s;
			
 
				+          z[30:23] <= b_e[7:0] + 127;
			
 
				+          z[22:0] <= b_m[26:3];
			
 
				+          state <= put_z;
			
 
				+        //if a is zero return b
			
 
				+        end else if (($signed(a_e) == -127) && (a_m == 0)) begin
			
 
				+          z[31] <= b_s;
			
 
				+          z[30:23] <= b_e[7:0] + 127;
			
 
				+          z[22:0] <= b_m[26:3];
			
 
				+          state <= put_z;
			
 
				+        //if b is zero return a
			
 
				+        end else if (($signed(b_e) == -127) && (b_m == 0)) begin
			
 
				+          z[31] <= a_s;
			
 
				+          z[30:23] <= a_e[7:0] + 127;
			
 
				+          z[22:0] <= a_m[26:3];
			
 
				+          state <= put_z;
			
 
				+        end else begin
			
 
				+          //Denormalised Number
			
 
				+          if ($signed(a_e) == -127) begin
			
 
				+            a_e <= -126;
			
 
				+          end else begin
			
 
				+            a_m[26] <= 1;
			
 
				+          end
			
 
				+          //Denormalised Number
			
 
				+          if ($signed(b_e) == -127) begin
			
 
				+            b_e <= -126;
			
 
				+          end else begin
			
 
				+            b_m[26] <= 1;
			
 
				+          end
			
 
				+          state <= align;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      align:
			
 
				+      begin
			
 
				+        if ($signed(a_e) > $signed(b_e)) begin
			
 
				+          b_e <= b_e + 1;
			
 
				+          b_m <= b_m >> 1;
			
 
				+          b_m[0] <= b_m[0] | b_m[1];
			
 
				+        end else if ($signed(a_e) < $signed(b_e)) begin
			
 
				+          a_e <= a_e + 1;
			
 
				+          a_m <= a_m >> 1;
			
 
				+          a_m[0] <= a_m[0] | a_m[1];
			
 
				+        end else begin
			
 
				+          state <= add_0;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      add_0:
			
 
				+      begin
			
 
				+        z_e <= a_e;
			
 
				+        if (a_s == b_s) begin
			
 
				+          sum <= a_m + b_m;
			
 
				+          z_s <= a_s;
			
 
				+        end else begin
			
 
				+          if (a_m >= b_m) begin
			
 
				+            sum <= a_m - b_m;
			
 
				+            z_s <= a_s;
			
 
				+          end else begin
			
 
				+            sum <= b_m - a_m;
			
 
				+            z_s <= b_s;
			
 
				+          end
			
 
				+        end
			
 
				+        state <= add_1;
			
 
				+      end
			
 
				+
			
 
				+      add_1:
			
 
				+      begin
			
 
				+        if (sum[27]) begin
			
 
				+          z_m <= sum[27:4];
			
 
				+          guard <= sum[3];
			
 
				+          round_bit <= sum[2];
			
 
				+          sticky <= sum[1] | sum[0];
			
 
				+          z_e <= z_e + 1;
			
 
				+        end else begin
			
 
				+          z_m <= sum[26:3];
			
 
				+          guard <= sum[2];
			
 
				+          round_bit <= sum[1];
			
 
				+          sticky <= sum[0];
			
 
				+        end
			
 
				+        state <= normalise_1;
			
 
				+      end
			
 
				+
			
 
				+      normalise_1:
			
 
				+      begin
			
 
				+        if (z_m[23] == 0 && $signed(z_e) > -126) begin
			
 
				+          z_e <= z_e - 1;
			
 
				+          z_m <= z_m << 1;
			
 
				+          z_m[0] <= guard;
			
 
				+          guard <= round_bit;
			
 
				+          round_bit <= 0;
			
 
				+        end else begin
			
 
				+          state <= normalise_2;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      normalise_2:
			
 
				+      begin
			
 
				+        if ($signed(z_e) < -126) begin
			
 
				+          z_e <= z_e + 1;
			
 
				+          z_m <= z_m >> 1;
			
 
				+          guard <= z_m[0];
			
 
				+          round_bit <= guard;
			
 
				+          sticky <= sticky | round_bit;
			
 
				+        end else begin
			
 
				+          state <= round;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      round:
			
 
				+      begin
			
 
				+        if (guard && (round_bit | sticky | z_m[0])) begin
			
 
				+          z_m <= z_m + 1;
			
 
				+          if (z_m == 24'hffffff) begin
			
 
				+            z_e <=z_e + 1;
			
 
				+          end
			
 
				+        end
			
 
				+        state <= pack;
			
 
				+      end
			
 
				+
			
 
				+      pack:
			
 
				+      begin
			
 
				+        z[22 : 0] <= z_m[22:0];
			
 
				+        z[30 : 23] <= z_e[7:0] + 127;
			
 
				+        z[31] <= z_s;
			
 
				+        if ($signed(z_e) == -126 && z_m[23] == 0) begin
			
 
				+          z[30 : 23] <= 0;
			
 
				+        end
			
 
				+        if ($signed(z_e) == -126 && z_m[23:0] == 24'h0) begin
			
 
				+          z[31] <= 1'b0; // FIX SIGN BUG: -a + a = +0.
			
 
				+        end
			
 
				+        //if overflow occurs, return inf
			
 
				+        if ($signed(z_e) > 127) begin
			
 
				+          z[22 : 0] <= 0;
			
 
				+          z[30 : 23] <= 255;
			
 
				+          z[31] <= z_s;
			
 
				+        end
			
 
				+        state <= put_z;
			
 
				+      end
			
 
				+
			
 
				+      put_z:
			
 
				+      begin
			
 
				+        s_output_z_stb <= 1;
			
 
				+        s_output_z <= z;
			
 
				+        if (s_output_z_stb && output_z_ack) begin
			
 
				+          s_output_z_stb <= 0;
			
 
				+          state <= get_input;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+    endcase
			
 
				+
			
 
				+    if (rst == 1) begin
			
 
				+      state <= get_input;
			
 
				+      s_input_a_ack <= 0;
			
 
				+      s_input_b_ack <= 0;
			
 
				+      s_output_z_stb <= 0;
			
 
				+    end
			
 
				+
			
 
				+  end
			
 
				+  assign input_a_ack = s_input_a_ack;
			
 
				+  assign input_b_ack = s_input_b_ack;
			
 
				+  assign output_z_stb = s_output_z_stb;
			
 
				+  assign output_z = s_output_z;
			
 
				+
			
 
				+endmodule
			
--- a/src/fpu32/adder_piped.v
+++ b/src/fpu32/adder_piped.v
@@ -0,0 +1,177 @@
 
				+
			
 
				+
			
 
				+module adder_piped(
			
 
				+    input wire reset, clk,
			
 
				+    input [31:0] input_a, input_b,
			
 
				+    output [31:0] output_z
			
 
				+);
			
 
				+
			
 
				+// ========================
			
 
				+// Stage 0
			
 
				+// ========================
			
 
				+reg [26:0] s0_a_m, s0_b_m;  // Matissa
			
 
				+reg [9:0] s0_a_e, s0_b_e;   // Exponent
			
 
				+reg s0_a_s, s0_b_s;         // Sign
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    s0_a_m <= {input_a[22 : 0], 3'd0};
			
 
				+    s0_b_m <= {input_b[22 : 0], 3'd0};
			
 
				+    s0_a_e <= input_a[30 : 23] - 127;
			
 
				+    s0_b_e <= input_b[30 : 23] - 127;
			
 
				+    s0_a_s <= input_a[31];
			
 
				+    s0_b_s <= input_b[31];
			
 
				+end
			
 
				+
			
 
				+// ========================
			
 
				+// Stage 1
			
 
				+// ========================
			
 
				+reg [26:0] s1_a_m, s1_b_m;  // Matissa
			
 
				+reg [9:0] s1_a_e, s1_b_e;   // Exponent
			
 
				+reg s1_a_s, s1_b_s;         // Sign
			
 
				+reg [31:0] s1_z;
			
 
				+reg s1_sp;  // Special case flag
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    //if a is NaN or b is NaN return NaN
			
 
				+    if ((s0_a_e == 128 && s0_a_m != 0) || (s0_b_e == 128 && s0_b_m != 0)) begin
			
 
				+        s1_z[31] <= 1;
			
 
				+        s1_z[30:23] <= 255;
			
 
				+        s1_z[22] <= 1;
			
 
				+        s1_z[21:0] <= 0;
			
 
				+        s1_sp <= 1;
			
 
				+    //if a is inf return inf
			
 
				+    end else if (s0_a_e == 128) begin
			
 
				+        s1_z[31] <= s0_a_s;
			
 
				+        s1_z[30:23] <= 255;
			
 
				+        s1_z[22:0] <= 0;
			
 
				+        //if a is inf and signs don't match return nan
			
 
				+        if ((s0_b_e == 128) && (s0_a_s != s0_b_s)) begin
			
 
				+            s1_z[31] <= s0_b_s;
			
 
				+            s1_z[30:23] <= 255;
			
 
				+            s1_z[22] <= 1;
			
 
				+            s1_z[21:0] <= 0;
			
 
				+        end
			
 
				+        s1_sp <= 1;
			
 
				+    //if b is inf return inf
			
 
				+    end else if (s0_b_e == 128) begin
			
 
				+        s1_z[31] <= s0_b_s;
			
 
				+        s1_z[30:23] <= 255;
			
 
				+        s1_z[22:0] <= 0;
			
 
				+        s1_sp <= 1;
			
 
				+    //if a is zero return b
			
 
				+    end else if ((($signed(s0_a_e) == -127) && (s0_a_m == 0)) && (($signed(s0_b_e) == -127) && (s0_b_m == 0))) begin
			
 
				+        s1_z[31] <= s0_a_s & s0_b_s;
			
 
				+        s1_z[30:23] <= s0_b_e[7:0] + 127;
			
 
				+        s1_z[22:0] <= s0_b_m[26:3];
			
 
				+        s1_sp <= 1;
			
 
				+    //if a is zero return b
			
 
				+    end else if (($signed(s0_a_e) == -127) && (s0_a_m == 0)) begin
			
 
				+        s1_z[31] <= s0_b_s;
			
 
				+        s1_z[30:23] <= s0_b_e[7:0] + 127;
			
 
				+        s1_z[22:0] <= s0_b_m[26:3];
			
 
				+        s1_sp <= 1;
			
 
				+    //if b is zero return a
			
 
				+    end else if (($signed(s0_b_e) == -127) && (s0_b_m == 0)) begin
			
 
				+        s1_z[31] <= s0_a_s;
			
 
				+        s1_z[30:23] <= s0_a_e[7:0] + 127;
			
 
				+        s1_z[22:0] <= s0_a_m[26:3];
			
 
				+        s1_sp <= 1;
			
 
				+    end else begin
			
 
				+        //Denormalised Number
			
 
				+        if ($signed(s0_a_e) == -127) begin
			
 
				+            s1_a_e <= -126;
			
 
				+            s1_a_m <= s0_a_m;
			
 
				+        end else begin
			
 
				+            s1_a_e <= s0_a_e;
			
 
				+            s1_a_m[26] <= {1, s0_a_m[27:0]};
			
 
				+        end
			
 
				+        //Denormalised Number
			
 
				+        if ($signed(s0_b_e) == -127) begin
			
 
				+            s1_b_e <= -126;
			
 
				+            s1_b_m <= s0_b_m;
			
 
				+        end else begin
			
 
				+            s1_b_e <= s0_b_e;
			
 
				+            s1_b_m[26] <= {1, s0_a_m[27:0]};
			
 
				+        end
			
 
				+        s1_a_s <= s0_a_s;
			
 
				+        s1_b_s <= s0_b_s; 
			
 
				+        s1_sp <= 0;
			
 
				+    end
			
 
				+end
			
 
				+
			
 
				+// ========================
			
 
				+// Stage 2
			
 
				+// ========================
			
 
				+reg [26:0] s2_a_m, s2_b_m;  // Matissa
			
 
				+reg [9:0] s2_a_e, s2_b_e;   // Exponent
			
 
				+reg s2_a_s, s2_b_s;         // Sign
			
 
				+reg [31:0] s2_z;
			
 
				+reg s2_sp;  // Special case flag
			
 
				+
			
 
				+wire [9:0] s2_comp;
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    s2_z <= s1_z;
			
 
				+    s2_sp <= s1_sp;
			
 
				+end
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    if ($signed(s1_a_e) > $signed(s1_b_e)) begin
			
 
				+        s2_comp = $signed(s1_a_e) - $signed(s1_b_e);
			
 
				+        s2_b_e <= s1_b_e + s2_comp;
			
 
				+        s2_b_m <= s1_b_m >> s2_comp;
			
 
				+        s2_b_m[0] <= s1_b_m[0] | s1_b_m[1];  // TODO: FIX
			
 
				+        s2_a_e <= s1_a_e;
			
 
				+        s2_a_m <= s1_a_m;
			
 
				+    end else if ($signed(s1_a_e) < $signed(s1_b_e)) begin
			
 
				+        s2_comp = $signed(s1_b_e) - $signed(s1_a_e);
			
 
				+        s2_a_e <= s1_a_e + s2_comp;
			
 
				+        s2_a_m <= s1_a_m >> s2_comp;
			
 
				+        s2_a_m[0] <= s1_a_m[0] | s1_a_m[1];  // TODO: FIX
			
 
				+        s2_b_e <= s1_b_e;
			
 
				+        s2_b_m <= s1_b_m;
			
 
				+    end else begin
			
 
				+        s2_a_e <= s1_a_e;
			
 
				+        s2_a_m <= s1_a_m;
			
 
				+        s2_b_e <= s1_b_e;
			
 
				+        s2_b_m <= s1_b_m;
			
 
				+    end
			
 
				+    s2_a_s <= s1_a_s;
			
 
				+    s2_b_s <= s1_b_s;
			
 
				+end
			
 
				+
			
 
				+
			
 
				+// ========================
			
 
				+// Stage 3
			
 
				+// ========================
			
 
				+reg [26:0] s3_a_m, s3_b_m;  // Matissa
			
 
				+reg [9:0] s3_a_e, s3_b_e;   // Exponent
			
 
				+reg s3_a_s, s3_b_s;         // Sign
			
 
				+reg [31:0] s3_z;
			
 
				+reg s3_sp;  // Special case flag
			
 
				+
			
 
				+wire [9:0] s3_comp;
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    s3_z <= s2_z;
			
 
				+    s3_sp <= s2_sp;
			
 
				+end
			
 
				+
			
 
				+always @(posedge clk) begin
			
 
				+    z_e <= a_e;
			
 
				+    if (a_s == b_s) begin
			
 
				+        sum <= a_m + b_m;
			
 
				+        z_s <= a_s;
			
 
				+    end else begin
			
 
				+        if (a_m >= b_m) begin
			
 
				+            sum <= a_m - b_m;
			
 
				+            z_s <= a_s;
			
 
				+        end else begin
			
 
				+            sum <= b_m - a_m;
			
 
				+            z_s <= b_s;
			
 
				+        end
			
 
				+    end
			
 
				+end
			
 
				+
			
 
				+
			
 
				+endmodule
			
--- a/src/fpu32/fpu32.sv
+++ b/src/fpu32/fpu32.sv
@@ -1,3 +1,6 @@
 
				+`include "adder.v"
			
 
				+`include "mult.v"
			
 
				+
			
 
				 // synopsys translate_off
			
 
				 `timescale 1 ps / 1 ps
			
 
				 // synopsys translate_on
			
@@ -7,61 +10,68 @@ module fpu32_tb();
 
				     reg [31:0] input_a, input_b, result_add, result_div, result_mul;
			
 
				     wire nan, overflow, underflow, zero;
			
 
				 
			
 
				-    fpu_add adder(
			
 
				-        .aclr(reset),
			
 
				-        .clock(clk),
			
 
				-        .input_a(input_a),
			
 
				-        .input_b(input_b),
			
 
				-        .result(result_add)
			
 
				-    );
			
 
				-
			
 
				-    fpu_div divider(
			
 
				-        .aclr(reset),
			
 
				-        .clock(clk),
			
 
				-        .input_a(input_a),
			
 
				-        .input_b(input_b),
			
 
				-        .result(result_div)
			
 
				-    );
			
 
				-
			
 
				-    fpu_mul multipler(
			
 
				-        .aclr(reset),
			
 
				-        .clock(clk),
			
 
				-        .dataa(input_a),
			
 
				-        .datab(input_b),
			
 
				-        .result(result_mul)
			
 
				-    );
			
 
				-
			
 
				-    task test_inputs;
			
 
				-        input [31:0] in_a, in_b, expected_add, expected_mul, expected_div;
			
 
				-        input_a = in_a;
			
 
				-        input_b = in_b;
			
 
				-        #10ps;
			
 
				-//        assert(exception_adder == 0);
			
 
				-//        assert(exception_mult == 0);
			
 
				-//        assert(overflow == 0);
			
 
				-//        assert(underflow == 0);
			
 
				-        if(result_add == expected_add)
			
 
				-            $display("PASS: %H + %H = %H", input_a, input_b, expected_add);
			
 
				-        else
			
 
				-            $error("FAIL ADD: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_add, expected_add);
			
 
				-        if(result_mul == expected_mul)
			
 
				-            $display("PASS: %H * %H = %H", input_a, input_b, expected_mul);
			
 
				-        else
			
 
				-            $error("FAIL MUL: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_mul, expected_mul);
			
 
				-        if(result_div == expected_div)
			
 
				-            $display("PASS: %H * %H = %H", input_a, input_b, expected_div);
			
 
				-        else
			
 
				-            $error("FAIL DIV: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_div, expected_div);
			
 
				-        #10ps;
			
 
				-    endtask : test_inputs
			
 
				-
			
 
				-    initial forever #15ps clk = ~clk;
			
 
				+
			
 
				+    reg adder_input_a_stb, adder_input_b_stb, adder_output_z_ack;
			
 
				+    wire adder_input_a_ack, adder_input_b_ack, adder_output_z_stb;
			
 
				+
			
 
				+	adder add0(
			
 
				+	    .clk(clk),
			
 
				+	    .rst(reset),
			
 
				+	    .input_a(input_a),
			
 
				+	    .input_a_stb(adder_input_a_stb),
			
 
				+	    .input_a_ack(adder_input_a_ack),
			
 
				+	    .input_b(input_b),
			
 
				+	    .input_b_stb(adder_input_b_stb),
			
 
				+	    .input_b_ack(adder_input_b_ack),
			
 
				+	    .output_z(result_add),
			
 
				+	    .output_z_ack(adder_output_z_ack),
			
 
				+	    .output_z_stb(adder_output_z_stb)
			
 
				+	);
			
 
				+
			
 
				+
			
 
				+    initial forever #5 clk = ~clk;
			
 
				+
			
 
				+    reg [31:0] test_mem [29:0][3:0];
			
 
				+
			
 
				+    initial $readmemh("../../scripts/fp32_test.hex", test_mem);
			
 
				 
			
 
				     initial begin
			
 
				         clk = 0;
			
 
				         reset = 1;
			
 
				-        test_inputs(32'h42480000, 32'hbf800000, 32'h42440000, 32'hc2480000, 32'hc2480000);
			
 
				+        adder_input_a_stb = 0;
			
 
				+        adder_input_b_stb = 0;
			
 
				+        adder_output_z_ack = 0;
			
 
				+
			
 
				+        #20;
			
 
				+        reset = 0;
			
 
				+
			
 
				+        foreach(test_mem[i]) begin
			
 
				+            input_a = test_mem[i][0];
			
 
				+            input_b = test_mem[i][1];
			
 
				+            adder_input_a_stb = 1;
			
 
				+            adder_input_b_stb = 1;
			
 
				+
			
 
				+            wait(adder_input_a_ack | adder_input_b_ack == 1);
			
 
				+            #15;
			
 
				+            adder_input_a_stb = 0;
			
 
				+            adder_input_b_stb = 0;
			
 
				+
			
 
				+            @(posedge adder_output_z_stb);
			
 
				+            adder_output_z_ack = 1;
			
 
				+            if(result_add != test_mem[i][3])
			
 
				+                $display("PASS: %H + %H = %H", input_a, input_b, result_add);
			
 
				+            else
			
 
				+                $error("FAIL ADD: %H + %H = %H, expected %H", input_a, input_b, result_add, test_mem[i][3]);
			
 
				+
			
 
				+            @(negedge adder_output_z_stb);
			
 
				+            adder_output_z_ack = 0;
			
 
				+            #10;
			
 
				+        end
			
 
				 
			
 
				+//        assert(result_add == 32'h42440000);
			
 
				+//        assert(result_mul == 32'hc2480000);
			
 
				+//        $finish();
			
 
				+//        test_inputs(32'h42480000, 32'hbf800000, 32'h42440000, 32'hc2480000, 32'hc2480000);
			
 
				     end
			
 
				 
			
 
				 
			
--- a/src/fpu32/fpu_add.qip
+++ b/src/fpu32/fpu_add.qip
@@ -1,4 +0,0 @@
 
				-set_global_assignment -name IP_TOOL_NAME "ALTFP_ADD_SUB"
			
 
				-set_global_assignment -name IP_TOOL_VERSION "20.1"
			
 
				-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
			
 
				-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_add.v"]
			
--- a/src/fpu32/fpu_add.v
+++ b/src/fpu32/fpu_add.v
@@ -1,123 +0,0 @@
 
				-// megafunction wizard: %ALTFP_ADD_SUB%
			
 
				-// GENERATION: STANDARD
			
 
				-// VERSION: WM1.0
			
 
				-// MODULE: altfp_add_sub 
			
 
				-
			
 
				-// ============================================================
			
 
				-// File Name: fpu_add.v
			
 
				-// Megafunction Name(s):
			
 
				-// 			altfp_add_sub
			
 
				-//
			
 
				-// Simulation Library Files(s):
			
 
				-// 			
			
 
				-// ============================================================
			
 
				-// ************************************************************
			
 
				-// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
			
 
				-//
			
 
				-// 20.1.0 Build 711 06/05/2020 SJ Lite Edition
			
 
				-// ************************************************************
			
 
				-
			
 
				-
			
 
				-//Copyright (C) 2020  Intel Corporation. All rights reserved.
			
 
				-//Your use of Intel Corporation's design tools, logic functions 
			
 
				-//and other software and tools, and any partner logic 
			
 
				-//functions, and any output files from any of the foregoing 
			
 
				-//(including device programming or simulation files), and any 
			
 
				-//associated documentation or information are expressly subject 
			
 
				-//to the terms and conditions of the Intel Program License 
			
 
				-//Subscription Agreement, the Intel Quartus Prime License Agreement,
			
 
				-//the Intel FPGA IP License Agreement, or other applicable license
			
 
				-//agreement, including, without limitation, that your use is for
			
 
				-//the sole purpose of programming logic devices manufactured by
			
 
				-//Intel and sold by Intel or its authorized distributors.  Please
			
 
				-//refer to the applicable agreement for further details, at
			
 
				-//https://fpgasoftware.intel.com/eula.
			
 
				-
			
 
				-
			
 
				-// synopsys translate_off
			
 
				-`timescale 1 ps / 1 ps
			
 
				-// synopsys translate_on
			
 
				-module fpu_add (aclr, clock, input_a, input_b, nan, overflow,
			
 
				-	result, underflow, zero);
			
 
				-
			
 
				-	input	  aclr;
			
 
				-	input	  clock;
			
 
				-	input	[31:0]  input_a;
			
 
				-	input	[31:0]  input_b;
			
 
				-	output	  nan;
			
 
				-	output	  overflow;
			
 
				-	output	[31:0]  result;
			
 
				-	output	  underflow;
			
 
				-	output	  zero;
			
 
				-
			
 
				-	wire  sub_wire0;
			
 
				-	wire  sub_wire1;
			
 
				-	wire [31:0] sub_wire2;
			
 
				-	wire  sub_wire3;
			
 
				-	wire  sub_wire4;
			
 
				-	wire  nan = sub_wire0;
			
 
				-	wire  overflow = sub_wire1;
			
 
				-	wire [31:0] result = sub_wire2[31:0];
			
 
				-	wire  underflow = sub_wire3;
			
 
				-	wire  zero = sub_wire4;
			
 
				-
			
 
				-	altfp_add_sub	altfp_add_sub_component (
			
 
				-				.aclr (aclr),
			
 
				-				.clock (clock),
			
 
				-				.dataa (input_a),
			
 
				-				.datab (input_b),
			
 
				-				.nan (sub_wire0),
			
 
				-				.overflow (sub_wire1),
			
 
				-				.result (sub_wire2),
			
 
				-				.underflow (sub_wire3),
			
 
				-				.zero (sub_wire4));
			
 
				-	defparam
			
 
				-		altfp_add_sub_component.denormal_support = "NO",
			
 
				-		altfp_add_sub_component.direction = "ADD",
			
 
				-		altfp_add_sub_component.optimize = "SPEED",
			
 
				-		altfp_add_sub_component.pipeline = 14,
			
 
				-		altfp_add_sub_component.reduced_functionality = "NO",
			
 
				-		altfp_add_sub_component.width_exp = 8,
			
 
				-		altfp_add_sub_component.width_man = 23;
			
 
				-
			
 
				-endmodule
			
 
				-
			
 
				-// ============================================================
			
 
				-// CNX file retrieval info
			
 
				-// ============================================================
			
 
				-// Retrieval info: PRIVATE: FPM_FORMAT NUMERIC "0"
			
 
				-// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
			
 
				-// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
			
 
				-// Retrieval info: PRIVATE: WIDTH_DATA NUMERIC "32"
			
 
				-// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
			
 
				-// Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
			
 
				-// Retrieval info: CONSTANT: DIRECTION STRING "ADD"
			
 
				-// Retrieval info: CONSTANT: OPTIMIZE STRING "SPEED"
			
 
				-// Retrieval info: CONSTANT: PIPELINE NUMERIC "14"
			
 
				-// Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
			
 
				-// Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
			
 
				-// Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
			
 
				-// Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT NODEFVAL "aclr"
			
 
				-// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
			
 
				-// Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
			
 
				-// Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
			
 
				-// Retrieval info: USED_PORT: nan 0 0 0 0 OUTPUT NODEFVAL "nan"
			
 
				-// Retrieval info: USED_PORT: overflow 0 0 0 0 OUTPUT NODEFVAL "overflow"
			
 
				-// Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
			
 
				-// Retrieval info: USED_PORT: underflow 0 0 0 0 OUTPUT NODEFVAL "underflow"
			
 
				-// Retrieval info: USED_PORT: zero 0 0 0 0 OUTPUT NODEFVAL "zero"
			
 
				-// Retrieval info: CONNECT: @aclr 0 0 0 0 aclr 0 0 0 0
			
 
				-// Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
			
 
				-// Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
			
 
				-// Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
			
 
				-// Retrieval info: CONNECT: nan 0 0 0 0 @nan 0 0 0 0
			
 
				-// Retrieval info: CONNECT: overflow 0 0 0 0 @overflow 0 0 0 0
			
 
				-// Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
			
 
				-// Retrieval info: CONNECT: underflow 0 0 0 0 @underflow 0 0 0 0
			
 
				-// Retrieval info: CONNECT: zero 0 0 0 0 @zero 0 0 0 0
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.v TRUE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.inc FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.cmp FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.bsf FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add_inst.v FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add_bb.v FALSE
			
--- a/src/fpu32/fpu_div.qip
+++ b/src/fpu32/fpu_div.qip
@@ -1,4 +0,0 @@
 
				-set_global_assignment -name IP_TOOL_NAME "ALTFP_DIV"
			
 
				-set_global_assignment -name IP_TOOL_VERSION "20.1"
			
 
				-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
			
 
				-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_div.v"]
			
--- a/src/fpu32/fpu_div.v
+++ b/src/fpu32/fpu_div.v
@@ -1,135 +0,0 @@
 
				-// megafunction wizard: %ALTFP_DIV%
			
 
				-// GENERATION: STANDARD
			
 
				-// VERSION: WM1.0
			
 
				-// MODULE: altfp_div 
			
 
				-
			
 
				-// ============================================================
			
 
				-// File Name: fpu_div.v
			
 
				-// Megafunction Name(s):
			
 
				-// 			altfp_div
			
 
				-//
			
 
				-// Simulation Library Files(s):
			
 
				-// 			
			
 
				-// ============================================================
			
 
				-// ************************************************************
			
 
				-// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
			
 
				-//
			
 
				-// 20.1.0 Build 711 06/05/2020 SJ Lite Edition
			
 
				-// ************************************************************
			
 
				-
			
 
				-
			
 
				-//Copyright (C) 2020  Intel Corporation. All rights reserved.
			
 
				-//Your use of Intel Corporation's design tools, logic functions 
			
 
				-//and other software and tools, and any partner logic 
			
 
				-//functions, and any output files from any of the foregoing 
			
 
				-//(including device programming or simulation files), and any 
			
 
				-//associated documentation or information are expressly subject 
			
 
				-//to the terms and conditions of the Intel Program License 
			
 
				-//Subscription Agreement, the Intel Quartus Prime License Agreement,
			
 
				-//the Intel FPGA IP License Agreement, or other applicable license
			
 
				-//agreement, including, without limitation, that your use is for
			
 
				-//the sole purpose of programming logic devices manufactured by
			
 
				-//Intel and sold by Intel or its authorized distributors.  Please
			
 
				-//refer to the applicable agreement for further details, at
			
 
				-//https://fpgasoftware.intel.com/eula.
			
 
				-
			
 
				-
			
 
				-// synopsys translate_off
			
 
				-`timescale 1 ps / 1 ps
			
 
				-// synopsys translate_on
			
 
				-module fpu_div (
			
 
				-	aclr,
			
 
				-	clock,
			
 
				-	input_a,
			
 
				-	input_b,
			
 
				-	division_by_zero,
			
 
				-	nan,
			
 
				-	overflow,
			
 
				-	result,
			
 
				-	underflow,
			
 
				-	zero);
			
 
				-
			
 
				-	input	  aclr;
			
 
				-	input	  clock;
			
 
				-	input	[31:0]  input_a;
			
 
				-	input	[31:0]  input_b;
			
 
				-	output	  division_by_zero;
			
 
				-	output	  nan;
			
 
				-	output	  overflow;
			
 
				-	output	[31:0]  result;
			
 
				-	output	  underflow;
			
 
				-	output	  zero;
			
 
				-
			
 
				-	wire  sub_wire0;
			
 
				-	wire  sub_wire1;
			
 
				-	wire  sub_wire2;
			
 
				-	wire [31:0] sub_wire3;
			
 
				-	wire  sub_wire4;
			
 
				-	wire  sub_wire5;
			
 
				-	wire  division_by_zero = sub_wire0;
			
 
				-	wire  nan = sub_wire1;
			
 
				-	wire  overflow = sub_wire2;
			
 
				-	wire [31:0] result = sub_wire3[31:0];
			
 
				-	wire  underflow = sub_wire4;
			
 
				-	wire  zero = sub_wire5;
			
 
				-
			
 
				-	altfp_div	altfp_div_component (
			
 
				-				.aclr (aclr),
			
 
				-				.clock (clock),
			
 
				-				.dataa (input_a),
			
 
				-				.datab (input_b),
			
 
				-				.division_by_zero (sub_wire0),
			
 
				-				.nan (sub_wire1),
			
 
				-				.overflow (sub_wire2),
			
 
				-				.result (sub_wire3),
			
 
				-				.underflow (sub_wire4),
			
 
				-				.zero (sub_wire5));
			
 
				-	defparam
			
 
				-		altfp_div_component.denormal_support = "NO",
			
 
				-		altfp_div_component.optimize = "SPEED",
			
 
				-		altfp_div_component.pipeline = 14,
			
 
				-		altfp_div_component.reduced_functionality = "NO",
			
 
				-		altfp_div_component.width_exp = 8,
			
 
				-		altfp_div_component.width_man = 23;
			
 
				-
			
 
				-
			
 
				-endmodule
			
 
				-
			
 
				-// ============================================================
			
 
				-// CNX file retrieval info
			
 
				-// ============================================================
			
 
				-// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
			
 
				-// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
			
 
				-// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
			
 
				-// Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
			
 
				-// Retrieval info: CONSTANT: OPTIMIZE STRING "SPEED"
			
 
				-// Retrieval info: CONSTANT: PIPELINE NUMERIC "14"
			
 
				-// Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
			
 
				-// Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
			
 
				-// Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
			
 
				-// Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT NODEFVAL "aclr"
			
 
				-// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
			
 
				-// Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
			
 
				-// Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
			
 
				-// Retrieval info: USED_PORT: division_by_zero 0 0 0 0 OUTPUT NODEFVAL "division_by_zero"
			
 
				-// Retrieval info: USED_PORT: nan 0 0 0 0 OUTPUT NODEFVAL "nan"
			
 
				-// Retrieval info: USED_PORT: overflow 0 0 0 0 OUTPUT NODEFVAL "overflow"
			
 
				-// Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
			
 
				-// Retrieval info: USED_PORT: underflow 0 0 0 0 OUTPUT NODEFVAL "underflow"
			
 
				-// Retrieval info: USED_PORT: zero 0 0 0 0 OUTPUT NODEFVAL "zero"
			
 
				-// Retrieval info: CONNECT: @aclr 0 0 0 0 aclr 0 0 0 0
			
 
				-// Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
			
 
				-// Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
			
 
				-// Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
			
 
				-// Retrieval info: CONNECT: division_by_zero 0 0 0 0 @division_by_zero 0 0 0 0
			
 
				-// Retrieval info: CONNECT: nan 0 0 0 0 @nan 0 0 0 0
			
 
				-// Retrieval info: CONNECT: overflow 0 0 0 0 @overflow 0 0 0 0
			
 
				-// Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
			
 
				-// Retrieval info: CONNECT: underflow 0 0 0 0 @underflow 0 0 0 0
			
 
				-// Retrieval info: CONNECT: zero 0 0 0 0 @zero 0 0 0 0
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.v TRUE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.inc FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.cmp FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.bsf FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div_inst.v FALSE
			
 
				-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div_bb.v FALSE
			
--- a/src/fpu32/fpu_mul.qip
+++ b/src/fpu32/fpu_mul.qip
@@ -1,5 +0,0 @@
 
				-set_global_assignment -name IP_TOOL_NAME "ALTFP_MULT"
			
 
				-set_global_assignment -name IP_TOOL_VERSION "18.1"
			
 
				-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
			
 
				-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_mul.v"]
			
 
				-set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "fpu_mul.cmp"]
			
--- a/src/fpu32/fpu_mul.v
+++ b/src/fpu32/fpu_mul.v
--- a/src/fpu32/mult.v
+++ b/src/fpu32/mult.v
@@ -0,0 +1,276 @@
 
				+//IEEE Floating Point Multiplier (Single Precision)
			
 
				+//Copyright (C) Jonathan P Dawson 2013
			
 
				+//2013-12-12
			
 
				+
			
 
				+module multiplier(
			
 
				+        input_a,
			
 
				+        input_b,
			
 
				+        input_a_stb,
			
 
				+        input_b_stb,
			
 
				+        output_z_ack,
			
 
				+        clk,
			
 
				+        rst,
			
 
				+        output_z,
			
 
				+        output_z_stb,
			
 
				+        input_a_ack,
			
 
				+        input_b_ack);
			
 
				+
			
 
				+  input     clk;
			
 
				+  input     rst;
			
 
				+
			
 
				+  input     [31:0] input_a;
			
 
				+  input     input_a_stb;
			
 
				+  output    input_a_ack;
			
 
				+
			
 
				+  input     [31:0] input_b;
			
 
				+  input     input_b_stb;
			
 
				+  output    input_b_ack;
			
 
				+
			
 
				+  output    [31:0] output_z;
			
 
				+  output    output_z_stb;
			
 
				+  input     output_z_ack;
			
 
				+
			
 
				+  reg       s_output_z_stb;
			
 
				+  reg       [31:0] s_output_z;
			
 
				+  reg       s_input_a_ack;
			
 
				+  reg       s_input_b_ack;
			
 
				+
			
 
				+  reg       [3:0] state;
			
 
				+  parameter get_a         = 4'd0,
			
 
				+            get_b         = 4'd1,
			
 
				+            unpack        = 4'd2,
			
 
				+            special_cases = 4'd3,
			
 
				+            normalise_a   = 4'd4,
			
 
				+            normalise_b   = 4'd5,
			
 
				+            multiply_0    = 4'd6,
			
 
				+            multiply_1    = 4'd7,
			
 
				+            normalise_1   = 4'd8,
			
 
				+            normalise_2   = 4'd9,
			
 
				+            round         = 4'd10,
			
 
				+            pack          = 4'd11,
			
 
				+            put_z         = 4'd12;
			
 
				+
			
 
				+  reg       [31:0] a, b, z;
			
 
				+  reg       [23:0] a_m, b_m, z_m;
			
 
				+  reg       [9:0] a_e, b_e, z_e;
			
 
				+  reg       a_s, b_s, z_s;
			
 
				+  reg       guard, round_bit, sticky;
			
 
				+  reg       [49:0] product;
			
 
				+
			
 
				+  always @(posedge clk)
			
 
				+  begin
			
 
				+
			
 
				+    case(state)
			
 
				+
			
 
				+      get_a:
			
 
				+      begin
			
 
				+        s_input_a_ack <= 1;
			
 
				+        if (s_input_a_ack && input_a_stb) begin
			
 
				+          a <= input_a;
			
 
				+          s_input_a_ack <= 0;
			
 
				+          state <= get_b;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      get_b:
			
 
				+      begin
			
 
				+        s_input_b_ack <= 1;
			
 
				+        if (s_input_b_ack && input_b_stb) begin
			
 
				+          b <= input_b;
			
 
				+          s_input_b_ack <= 0;
			
 
				+          state <= unpack;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      unpack:
			
 
				+      begin
			
 
				+        a_m <= a[22 : 0];
			
 
				+        b_m <= b[22 : 0];
			
 
				+        a_e <= a[30 : 23] - 127;
			
 
				+        b_e <= b[30 : 23] - 127;
			
 
				+        a_s <= a[31];
			
 
				+        b_s <= b[31];
			
 
				+        state <= special_cases;
			
 
				+      end
			
 
				+
			
 
				+      special_cases:
			
 
				+      begin
			
 
				+        //if a is NaN or b is NaN return NaN
			
 
				+        if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
			
 
				+          z[31] <= 1;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22] <= 1;
			
 
				+          z[21:0] <= 0;
			
 
				+          state <= put_z;
			
 
				+        //if a is inf return inf
			
 
				+        end else if (a_e == 128) begin
			
 
				+          z[31] <= a_s ^ b_s;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22:0] <= 0;
			
 
				+          //if b is zero return NaN
			
 
				+          if (($signed(b_e) == -127) && (b_m == 0)) begin
			
 
				+            z[31] <= 1;
			
 
				+            z[30:23] <= 255;
			
 
				+            z[22] <= 1;
			
 
				+            z[21:0] <= 0;
			
 
				+          end
			
 
				+          state <= put_z;
			
 
				+        //if b is inf return inf
			
 
				+        end else if (b_e == 128) begin
			
 
				+          z[31] <= a_s ^ b_s;
			
 
				+          z[30:23] <= 255;
			
 
				+          z[22:0] <= 0;
			
 
				+          //if a is zero return NaN
			
 
				+          if (($signed(a_e) == -127) && (a_m == 0)) begin
			
 
				+            z[31] <= 1;
			
 
				+            z[30:23] <= 255;
			
 
				+            z[22] <= 1;
			
 
				+            z[21:0] <= 0;
			
 
				+          end
			
 
				+          state <= put_z;
			
 
				+        //if a is zero return zero
			
 
				+        end else if (($signed(a_e) == -127) && (a_m == 0)) begin
			
 
				+          z[31] <= a_s ^ b_s;
			
 
				+          z[30:23] <= 0;
			
 
				+          z[22:0] <= 0;
			
 
				+          state <= put_z;
			
 
				+        //if b is zero return zero
			
 
				+        end else if (($signed(b_e) == -127) && (b_m == 0)) begin
			
 
				+          z[31] <= a_s ^ b_s;
			
 
				+          z[30:23] <= 0;
			
 
				+          z[22:0] <= 0;
			
 
				+          state <= put_z;
			
 
				+        end else begin
			
 
				+          //Denormalised Number
			
 
				+          if ($signed(a_e) == -127) begin
			
 
				+            a_e <= -126;
			
 
				+          end else begin
			
 
				+            a_m[23] <= 1;
			
 
				+          end
			
 
				+          //Denormalised Number
			
 
				+          if ($signed(b_e) == -127) begin
			
 
				+            b_e <= -126;
			
 
				+          end else begin
			
 
				+            b_m[23] <= 1;
			
 
				+          end
			
 
				+          state <= normalise_a;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      normalise_a:
			
 
				+      begin
			
 
				+        if (a_m[23]) begin
			
 
				+          state <= normalise_b;
			
 
				+        end else begin
			
 
				+          a_m <= a_m << 1;
			
 
				+          a_e <= a_e - 1;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      normalise_b:
			
 
				+      begin
			
 
				+        if (b_m[23]) begin
			
 
				+          state <= multiply_0;
			
 
				+        end else begin
			
 
				+          b_m <= b_m << 1;
			
 
				+          b_e <= b_e - 1;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      multiply_0:
			
 
				+      begin
			
 
				+        z_s <= a_s ^ b_s;
			
 
				+        z_e <= a_e + b_e + 1;
			
 
				+        product <= a_m * b_m * 4;
			
 
				+        state <= multiply_1;
			
 
				+      end
			
 
				+
			
 
				+      multiply_1:
			
 
				+      begin
			
 
				+        z_m <= product[49:26];
			
 
				+        guard <= product[25];
			
 
				+        round_bit <= product[24];
			
 
				+        sticky <= (product[23:0] != 0);
			
 
				+        state <= normalise_1;
			
 
				+      end
			
 
				+
			
 
				+      normalise_1:
			
 
				+      begin
			
 
				+        if (z_m[23] == 0) begin
			
 
				+          z_e <= z_e - 1;
			
 
				+          z_m <= z_m << 1;
			
 
				+          z_m[0] <= guard;
			
 
				+          guard <= round_bit;
			
 
				+          round_bit <= 0;
			
 
				+        end else begin
			
 
				+          state <= normalise_2;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      normalise_2:
			
 
				+      begin
			
 
				+        if ($signed(z_e) < -126) begin
			
 
				+          z_e <= z_e + 1;
			
 
				+          z_m <= z_m >> 1;
			
 
				+          guard <= z_m[0];
			
 
				+          round_bit <= guard;
			
 
				+          sticky <= sticky | round_bit;
			
 
				+        end else begin
			
 
				+          state <= round;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+      round:
			
 
				+      begin
			
 
				+        if (guard && (round_bit | sticky | z_m[0])) begin
			
 
				+          z_m <= z_m + 1;
			
 
				+          if (z_m == 24'hffffff) begin
			
 
				+            z_e <=z_e + 1;
			
 
				+          end
			
 
				+        end
			
 
				+        state <= pack;
			
 
				+      end
			
 
				+
			
 
				+      pack:
			
 
				+      begin
			
 
				+        z[22 : 0] <= z_m[22:0];
			
 
				+        z[30 : 23] <= z_e[7:0] + 127;
			
 
				+        z[31] <= z_s;
			
 
				+        if ($signed(z_e) == -126 && z_m[23] == 0) begin
			
 
				+          z[30 : 23] <= 0;
			
 
				+        end
			
 
				+        //if overflow occurs, return inf
			
 
				+        if ($signed(z_e) > 127) begin
			
 
				+          z[22 : 0] <= 0;
			
 
				+          z[30 : 23] <= 255;
			
 
				+          z[31] <= z_s;
			
 
				+        end
			
 
				+        state <= put_z;
			
 
				+      end
			
 
				+
			
 
				+      put_z:
			
 
				+      begin
			
 
				+        s_output_z_stb <= 1;
			
 
				+        s_output_z <= z;
			
 
				+        if (s_output_z_stb && output_z_ack) begin
			
 
				+          s_output_z_stb <= 0;
			
 
				+          state <= get_a;
			
 
				+        end
			
 
				+      end
			
 
				+
			
 
				+    endcase
			
 
				+
			
 
				+    if (rst == 1) begin
			
 
				+      state <= get_a;
			
 
				+      s_input_a_ack <= 0;
			
 
				+      s_input_b_ack <= 0;
			
 
				+      s_output_z_stb <= 0;
			
 
				+    end
			
 
				+
			
 
				+  end
			
 
				+  assign input_a_ack = s_input_a_ack;
			
 
				+  assign input_b_ack = s_input_b_ack;
			
 
				+  assign output_z_stb = s_output_z_stb;
			
 
				+  assign output_z = s_output_z;
			
 
				+
			
 
				+endmodule
			
--- a/src/root.sv
+++ b/src/root.sv
@@ -1,5 +1,3 @@
 
				-
			
 
				-
			
 
				 // synopsys translate_off
			
 
				 `timescale 1 ps / 1 ps
			
 
				 // synopsys translate_on
			
@@ -43,6 +41,8 @@ module root_tb ();
 
				 
			
 
				         #60ps;
			
 
				         KEYS = 2'b11; // Release keys
			
 
				+        #7000ps;
			
 
				+        $finish();
			
 
				     end
			
 
				 
			
 
				 endmodule : root_tb