Min пре 4 година
родитељ
комит
ff722bedc5

+ 4 - 2
altera_devel.qsf

@@ -85,7 +85,9 @@ set_global_assignment -name EDA_TEST_BENCH_ENABLE_STATUS TEST_BENCH_MODE -sectio
 set_global_assignment -name EDA_NATIVELINK_SIMULATION_TEST_BENCH root_tb -section_id eda_simulation
 set_global_assignment -name EDA_TEST_BENCH_NAME root_tb -section_id eda_simulation
 set_global_assignment -name EDA_DESIGN_INSTANCE_NAME NA -section_id root_tb
-set_global_assignment -name EDA_TEST_BENCH_MODULE_NAME root_tb -section_id root_tb
-set_global_assignment -name EDA_TEST_BENCH_FILE src/root.sv -section_id root_tb
+set_global_assignment -name EDA_TEST_BENCH_MODULE_NAME fpu32_tb -section_id root_tb
 set_global_assignment -name SOURCE_FILE db/altera_devel.cmp.rdb
+set_global_assignment -name EDA_TEST_BENCH_FILE src/root.sv -section_id root_tb
+set_global_assignment -name EDA_TEST_BENCH_FILE src/fpu32/fpu32.sv -section_id root_tb
+set_global_assignment -name QIP_FILE altera/fpu32_adder.qip
 set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top

+ 32 - 0
scripts/fpu_test_gen.py

@@ -0,0 +1,32 @@
+import numpy as np
+import os
+
+def generate_fp16_tb(cases, filename, dtype=np.float16):
+    dsize = 0
+    if dtype == np.float16:
+        dsize = 2
+    elif dtype == np.float32:
+        dsize = 4
+    else:
+        raise ValueError(f"Unknown dtype {dtype}")
+
+    x = np.frombuffer(os.urandom(cases * dsize), dtype=dtype)
+    y = np.frombuffer(os.urandom(cases * dsize), dtype=dtype)
+    sum = x + y
+    mul = x * y
+    x = x.tobytes()
+    y = y.tobytes()
+    sum = sum.tobytes()
+    mul = mul.tobytes()
+    with open(filename, 'w') as f:
+        for i in range(cases):
+            f.write(' '.join([
+                x[i*dsize:i*dsize+dsize].hex(),
+                y[i*dsize:i*dsize+dsize].hex(),
+                sum[i*dsize:i*dsize+dsize].hex(),
+                mul[i*dsize:i*dsize+dsize].hex(),
+            ]) + '\n')
+
+
+if __name__ == '__main__':
+    generate_fp16_tb(30, 'fp32_test.hex', dtype=np.float32)

+ 20 - 0
simulation/modelsim/sim_fpu32.do

@@ -0,0 +1,20 @@
+#transcript on
+if {[file exists rtl_work]} {
+	vdel -lib rtl_work -all
+}
+set rootdir [pwd]/../..
+puts "Root Directory $rootdir"
+vlib rtl_work
+vmap work rtl_work
+
+vlog -sv -work work +incdir+${rootdir}/src ${rootdir}/src/fpu32/fpu32.sv
+vsim -t 1ps -L altera_ver -L lpm_ver -L sgate_ver -L altera_mf_ver -L altera_lnsim_ver -L cycloneive_ver -L rtl_work -L work -voptargs="+acc" fpu32_tb
+
+view structure
+view signals
+
+add wave -noupdate -label CLK /fpu32_tb/clk
+add wave -noupdate -label RESET /fpu32_tb/reset
+add wave -noupdate -label INPUT_A -radix float32 /fpu32_tb/input_a
+add wave -noupdate -label INPUT_B -radix float32 /fpu32_tb/input_b
+add wave -noupdate -label RESULT_ADDER -radix float32 /fpu32_tb/result_add

+ 305 - 0
src/fpu32/adder.v

@@ -0,0 +1,305 @@
+//IEEE Floating Point Adder (Single Precision)
+//Copyright (C) Jonathan P Dawson 2013
+//2013-12-12
+
+module adder(
+        input_a,
+        input_b,
+        input_a_stb,
+        input_b_stb,
+        output_z_ack,
+        clk,
+        rst,
+        output_z,
+        output_z_stb,
+        input_a_ack,
+        input_b_ack);
+
+  input     clk;
+  input     rst;
+
+  input     [31:0] input_a;
+  input     input_a_stb;
+  output    input_a_ack;
+
+  input     [31:0] input_b;
+  input     input_b_stb;
+  output    input_b_ack;
+
+  output    [31:0] output_z;
+  output    output_z_stb;
+  input     output_z_ack;
+
+  reg       s_output_z_stb;
+  reg       [31:0] s_output_z;
+  reg       s_input_a_ack;
+  reg       s_input_b_ack;
+
+  reg       [3:0] state;
+  parameter get_a         = 4'd0,
+            get_b         = 4'd1,
+            unpack        = 4'd2,
+            special_cases = 4'd3,
+            align         = 4'd4,
+            add_0         = 4'd5,
+            add_1         = 4'd6,
+            normalise_1   = 4'd7,
+            normalise_2   = 4'd8,
+            round         = 4'd9,
+            pack          = 4'd10,
+            put_z         = 4'd11,
+            get_input     = 4'd12;
+
+  reg       [31:0] a, b, z;
+  reg       [26:0] a_m, b_m;
+  reg       [23:0] z_m;
+  reg       [9:0] a_e, b_e, z_e;
+  reg       a_s, b_s, z_s;
+  reg       guard, round_bit, sticky;
+  reg       [27:0] sum;
+
+  always @(posedge clk)
+  begin
+
+    case(state)
+
+      get_a:
+      begin
+        s_input_a_ack <= 1;
+        if (s_input_a_ack && input_a_stb) begin
+          a <= input_a;
+          s_input_a_ack <= 0;
+          state <= get_b;
+        end
+      end
+
+      get_input:
+      begin
+        s_input_a_ack <= 1;
+        s_input_b_ack <= 1;
+        if (s_input_a_ack && input_a_stb && s_input_b_ack && input_b_stb) begin
+          a <= input_a;
+          b <= input_b;
+          s_input_a_ack <= 0;
+          s_input_b_ack <= 0;
+          state <= unpack;
+        end
+      end
+
+      get_b:
+      begin
+        s_input_b_ack <= 1;
+        if (s_input_b_ack && input_b_stb) begin
+          b <= input_b;
+          s_input_b_ack <= 0;
+          state <= unpack;
+        end
+      end
+
+      unpack:
+      begin
+        a_m <= {a[22 : 0], 3'd0};
+        b_m <= {b[22 : 0], 3'd0};
+        a_e <= a[30 : 23] - 127;
+        b_e <= b[30 : 23] - 127;
+        a_s <= a[31];
+        b_s <= b[31];
+        state <= special_cases;
+      end
+
+      special_cases:
+      begin
+        //if a is NaN or b is NaN return NaN
+        if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
+          z[31] <= 1;
+          z[30:23] <= 255;
+          z[22] <= 1;
+          z[21:0] <= 0;
+          state <= put_z;
+        //if a is inf return inf
+        end else if (a_e == 128) begin
+          z[31] <= a_s;
+          z[30:23] <= 255;
+          z[22:0] <= 0;
+          //if a is inf and signs don't match return nan
+          if ((b_e == 128) && (a_s != b_s)) begin
+              z[31] <= b_s;
+              z[30:23] <= 255;
+              z[22] <= 1;
+              z[21:0] <= 0;
+          end
+          state <= put_z;
+        //if b is inf return inf
+        end else if (b_e == 128) begin
+          z[31] <= b_s;
+          z[30:23] <= 255;
+          z[22:0] <= 0;
+          state <= put_z;
+        //if a is zero return b
+        end else if ((($signed(a_e) == -127) && (a_m == 0)) && (($signed(b_e) == -127) && (b_m == 0))) begin
+          z[31] <= a_s & b_s;
+          z[30:23] <= b_e[7:0] + 127;
+          z[22:0] <= b_m[26:3];
+          state <= put_z;
+        //if a is zero return b
+        end else if (($signed(a_e) == -127) && (a_m == 0)) begin
+          z[31] <= b_s;
+          z[30:23] <= b_e[7:0] + 127;
+          z[22:0] <= b_m[26:3];
+          state <= put_z;
+        //if b is zero return a
+        end else if (($signed(b_e) == -127) && (b_m == 0)) begin
+          z[31] <= a_s;
+          z[30:23] <= a_e[7:0] + 127;
+          z[22:0] <= a_m[26:3];
+          state <= put_z;
+        end else begin
+          //Denormalised Number
+          if ($signed(a_e) == -127) begin
+            a_e <= -126;
+          end else begin
+            a_m[26] <= 1;
+          end
+          //Denormalised Number
+          if ($signed(b_e) == -127) begin
+            b_e <= -126;
+          end else begin
+            b_m[26] <= 1;
+          end
+          state <= align;
+        end
+      end
+
+      align:
+      begin
+        if ($signed(a_e) > $signed(b_e)) begin
+          b_e <= b_e + 1;
+          b_m <= b_m >> 1;
+          b_m[0] <= b_m[0] | b_m[1];
+        end else if ($signed(a_e) < $signed(b_e)) begin
+          a_e <= a_e + 1;
+          a_m <= a_m >> 1;
+          a_m[0] <= a_m[0] | a_m[1];
+        end else begin
+          state <= add_0;
+        end
+      end
+
+      add_0:
+      begin
+        z_e <= a_e;
+        if (a_s == b_s) begin
+          sum <= a_m + b_m;
+          z_s <= a_s;
+        end else begin
+          if (a_m >= b_m) begin
+            sum <= a_m - b_m;
+            z_s <= a_s;
+          end else begin
+            sum <= b_m - a_m;
+            z_s <= b_s;
+          end
+        end
+        state <= add_1;
+      end
+
+      add_1:
+      begin
+        if (sum[27]) begin
+          z_m <= sum[27:4];
+          guard <= sum[3];
+          round_bit <= sum[2];
+          sticky <= sum[1] | sum[0];
+          z_e <= z_e + 1;
+        end else begin
+          z_m <= sum[26:3];
+          guard <= sum[2];
+          round_bit <= sum[1];
+          sticky <= sum[0];
+        end
+        state <= normalise_1;
+      end
+
+      normalise_1:
+      begin
+        if (z_m[23] == 0 && $signed(z_e) > -126) begin
+          z_e <= z_e - 1;
+          z_m <= z_m << 1;
+          z_m[0] <= guard;
+          guard <= round_bit;
+          round_bit <= 0;
+        end else begin
+          state <= normalise_2;
+        end
+      end
+
+      normalise_2:
+      begin
+        if ($signed(z_e) < -126) begin
+          z_e <= z_e + 1;
+          z_m <= z_m >> 1;
+          guard <= z_m[0];
+          round_bit <= guard;
+          sticky <= sticky | round_bit;
+        end else begin
+          state <= round;
+        end
+      end
+
+      round:
+      begin
+        if (guard && (round_bit | sticky | z_m[0])) begin
+          z_m <= z_m + 1;
+          if (z_m == 24'hffffff) begin
+            z_e <=z_e + 1;
+          end
+        end
+        state <= pack;
+      end
+
+      pack:
+      begin
+        z[22 : 0] <= z_m[22:0];
+        z[30 : 23] <= z_e[7:0] + 127;
+        z[31] <= z_s;
+        if ($signed(z_e) == -126 && z_m[23] == 0) begin
+          z[30 : 23] <= 0;
+        end
+        if ($signed(z_e) == -126 && z_m[23:0] == 24'h0) begin
+          z[31] <= 1'b0; // FIX SIGN BUG: -a + a = +0.
+        end
+        //if overflow occurs, return inf
+        if ($signed(z_e) > 127) begin
+          z[22 : 0] <= 0;
+          z[30 : 23] <= 255;
+          z[31] <= z_s;
+        end
+        state <= put_z;
+      end
+
+      put_z:
+      begin
+        s_output_z_stb <= 1;
+        s_output_z <= z;
+        if (s_output_z_stb && output_z_ack) begin
+          s_output_z_stb <= 0;
+          state <= get_input;
+        end
+      end
+
+    endcase
+
+    if (rst == 1) begin
+      state <= get_input;
+      s_input_a_ack <= 0;
+      s_input_b_ack <= 0;
+      s_output_z_stb <= 0;
+    end
+
+  end
+  assign input_a_ack = s_input_a_ack;
+  assign input_b_ack = s_input_b_ack;
+  assign output_z_stb = s_output_z_stb;
+  assign output_z = s_output_z;
+
+endmodule

+ 177 - 0
src/fpu32/adder_piped.v

@@ -0,0 +1,177 @@
+
+
+module adder_piped(
+    input wire reset, clk,
+    input [31:0] input_a, input_b,
+    output [31:0] output_z
+);
+
+// ========================
+// Stage 0
+// ========================
+reg [26:0] s0_a_m, s0_b_m;  // Matissa
+reg [9:0] s0_a_e, s0_b_e;   // Exponent
+reg s0_a_s, s0_b_s;         // Sign
+
+always @(posedge clk) begin
+    s0_a_m <= {input_a[22 : 0], 3'd0};
+    s0_b_m <= {input_b[22 : 0], 3'd0};
+    s0_a_e <= input_a[30 : 23] - 127;
+    s0_b_e <= input_b[30 : 23] - 127;
+    s0_a_s <= input_a[31];
+    s0_b_s <= input_b[31];
+end
+
+// ========================
+// Stage 1
+// ========================
+reg [26:0] s1_a_m, s1_b_m;  // Matissa
+reg [9:0] s1_a_e, s1_b_e;   // Exponent
+reg s1_a_s, s1_b_s;         // Sign
+reg [31:0] s1_z;
+reg s1_sp;  // Special case flag
+
+always @(posedge clk) begin
+    //if a is NaN or b is NaN return NaN
+    if ((s0_a_e == 128 && s0_a_m != 0) || (s0_b_e == 128 && s0_b_m != 0)) begin
+        s1_z[31] <= 1;
+        s1_z[30:23] <= 255;
+        s1_z[22] <= 1;
+        s1_z[21:0] <= 0;
+        s1_sp <= 1;
+    //if a is inf return inf
+    end else if (s0_a_e == 128) begin
+        s1_z[31] <= s0_a_s;
+        s1_z[30:23] <= 255;
+        s1_z[22:0] <= 0;
+        //if a is inf and signs don't match return nan
+        if ((s0_b_e == 128) && (s0_a_s != s0_b_s)) begin
+            s1_z[31] <= s0_b_s;
+            s1_z[30:23] <= 255;
+            s1_z[22] <= 1;
+            s1_z[21:0] <= 0;
+        end
+        s1_sp <= 1;
+    //if b is inf return inf
+    end else if (s0_b_e == 128) begin
+        s1_z[31] <= s0_b_s;
+        s1_z[30:23] <= 255;
+        s1_z[22:0] <= 0;
+        s1_sp <= 1;
+    //if a is zero return b
+    end else if ((($signed(s0_a_e) == -127) && (s0_a_m == 0)) && (($signed(s0_b_e) == -127) && (s0_b_m == 0))) begin
+        s1_z[31] <= s0_a_s & s0_b_s;
+        s1_z[30:23] <= s0_b_e[7:0] + 127;
+        s1_z[22:0] <= s0_b_m[26:3];
+        s1_sp <= 1;
+    //if a is zero return b
+    end else if (($signed(s0_a_e) == -127) && (s0_a_m == 0)) begin
+        s1_z[31] <= s0_b_s;
+        s1_z[30:23] <= s0_b_e[7:0] + 127;
+        s1_z[22:0] <= s0_b_m[26:3];
+        s1_sp <= 1;
+    //if b is zero return a
+    end else if (($signed(s0_b_e) == -127) && (s0_b_m == 0)) begin
+        s1_z[31] <= s0_a_s;
+        s1_z[30:23] <= s0_a_e[7:0] + 127;
+        s1_z[22:0] <= s0_a_m[26:3];
+        s1_sp <= 1;
+    end else begin
+        //Denormalised Number
+        if ($signed(s0_a_e) == -127) begin
+            s1_a_e <= -126;
+            s1_a_m <= s0_a_m;
+        end else begin
+            s1_a_e <= s0_a_e;
+            s1_a_m[26] <= {1, s0_a_m[27:0]};
+        end
+        //Denormalised Number
+        if ($signed(s0_b_e) == -127) begin
+            s1_b_e <= -126;
+            s1_b_m <= s0_b_m;
+        end else begin
+            s1_b_e <= s0_b_e;
+            s1_b_m[26] <= {1, s0_a_m[27:0]};
+        end
+        s1_a_s <= s0_a_s;
+        s1_b_s <= s0_b_s; 
+        s1_sp <= 0;
+    end
+end
+
+// ========================
+// Stage 2
+// ========================
+reg [26:0] s2_a_m, s2_b_m;  // Matissa
+reg [9:0] s2_a_e, s2_b_e;   // Exponent
+reg s2_a_s, s2_b_s;         // Sign
+reg [31:0] s2_z;
+reg s2_sp;  // Special case flag
+
+wire [9:0] s2_comp;
+
+always @(posedge clk) begin
+    s2_z <= s1_z;
+    s2_sp <= s1_sp;
+end
+
+always @(posedge clk) begin
+    if ($signed(s1_a_e) > $signed(s1_b_e)) begin
+        s2_comp = $signed(s1_a_e) - $signed(s1_b_e);
+        s2_b_e <= s1_b_e + s2_comp;
+        s2_b_m <= s1_b_m >> s2_comp;
+        s2_b_m[0] <= s1_b_m[0] | s1_b_m[1];  // TODO: FIX
+        s2_a_e <= s1_a_e;
+        s2_a_m <= s1_a_m;
+    end else if ($signed(s1_a_e) < $signed(s1_b_e)) begin
+        s2_comp = $signed(s1_b_e) - $signed(s1_a_e);
+        s2_a_e <= s1_a_e + s2_comp;
+        s2_a_m <= s1_a_m >> s2_comp;
+        s2_a_m[0] <= s1_a_m[0] | s1_a_m[1];  // TODO: FIX
+        s2_b_e <= s1_b_e;
+        s2_b_m <= s1_b_m;
+    end else begin
+        s2_a_e <= s1_a_e;
+        s2_a_m <= s1_a_m;
+        s2_b_e <= s1_b_e;
+        s2_b_m <= s1_b_m;
+    end
+    s2_a_s <= s1_a_s;
+    s2_b_s <= s1_b_s;
+end
+
+
+// ========================
+// Stage 3
+// ========================
+reg [26:0] s3_a_m, s3_b_m;  // Matissa
+reg [9:0] s3_a_e, s3_b_e;   // Exponent
+reg s3_a_s, s3_b_s;         // Sign
+reg [31:0] s3_z;
+reg s3_sp;  // Special case flag
+
+wire [9:0] s3_comp;
+
+always @(posedge clk) begin
+    s3_z <= s2_z;
+    s3_sp <= s2_sp;
+end
+
+always @(posedge clk) begin
+    z_e <= a_e;
+    if (a_s == b_s) begin
+        sum <= a_m + b_m;
+        z_s <= a_s;
+    end else begin
+        if (a_m >= b_m) begin
+            sum <= a_m - b_m;
+            z_s <= a_s;
+        end else begin
+            sum <= b_m - a_m;
+            z_s <= b_s;
+        end
+    end
+end
+
+
+endmodule

+ 60 - 50
src/fpu32/fpu32.sv

@@ -1,3 +1,6 @@
+`include "adder.v"
+`include "mult.v"
+
 // synopsys translate_off
 `timescale 1 ps / 1 ps
 // synopsys translate_on
@@ -7,61 +10,68 @@ module fpu32_tb();
     reg [31:0] input_a, input_b, result_add, result_div, result_mul;
     wire nan, overflow, underflow, zero;
 
-    fpu_add adder(
-        .aclr(reset),
-        .clock(clk),
-        .input_a(input_a),
-        .input_b(input_b),
-        .result(result_add)
-    );
-
-    fpu_div divider(
-        .aclr(reset),
-        .clock(clk),
-        .input_a(input_a),
-        .input_b(input_b),
-        .result(result_div)
-    );
-
-    fpu_mul multipler(
-        .aclr(reset),
-        .clock(clk),
-        .dataa(input_a),
-        .datab(input_b),
-        .result(result_mul)
-    );
-
-    task test_inputs;
-        input [31:0] in_a, in_b, expected_add, expected_mul, expected_div;
-        input_a = in_a;
-        input_b = in_b;
-        #10ps;
-//        assert(exception_adder == 0);
-//        assert(exception_mult == 0);
-//        assert(overflow == 0);
-//        assert(underflow == 0);
-        if(result_add == expected_add)
-            $display("PASS: %H + %H = %H", input_a, input_b, expected_add);
-        else
-            $error("FAIL ADD: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_add, expected_add);
-        if(result_mul == expected_mul)
-            $display("PASS: %H * %H = %H", input_a, input_b, expected_mul);
-        else
-            $error("FAIL MUL: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_mul, expected_mul);
-        if(result_div == expected_div)
-            $display("PASS: %H * %H = %H", input_a, input_b, expected_div);
-        else
-            $error("FAIL DIV: a=%H b=%H c=%H, expected c=%H", input_a, input_b, result_div, expected_div);
-        #10ps;
-    endtask : test_inputs
-
-    initial forever #15ps clk = ~clk;
+
+    reg adder_input_a_stb, adder_input_b_stb, adder_output_z_ack;
+    wire adder_input_a_ack, adder_input_b_ack, adder_output_z_stb;
+
+	adder add0(
+	    .clk(clk),
+	    .rst(reset),
+	    .input_a(input_a),
+	    .input_a_stb(adder_input_a_stb),
+	    .input_a_ack(adder_input_a_ack),
+	    .input_b(input_b),
+	    .input_b_stb(adder_input_b_stb),
+	    .input_b_ack(adder_input_b_ack),
+	    .output_z(result_add),
+	    .output_z_ack(adder_output_z_ack),
+	    .output_z_stb(adder_output_z_stb)
+	);
+
+
+    initial forever #5 clk = ~clk;
+
+    reg [31:0] test_mem [29:0][3:0];
+
+    initial $readmemh("../../scripts/fp32_test.hex", test_mem);
 
     initial begin
         clk = 0;
         reset = 1;
-        test_inputs(32'h42480000, 32'hbf800000, 32'h42440000, 32'hc2480000, 32'hc2480000);
+        adder_input_a_stb = 0;
+        adder_input_b_stb = 0;
+        adder_output_z_ack = 0;
+
+        #20;
+        reset = 0;
+
+        foreach(test_mem[i]) begin
+            input_a = test_mem[i][0];
+            input_b = test_mem[i][1];
+            adder_input_a_stb = 1;
+            adder_input_b_stb = 1;
+
+            wait(adder_input_a_ack | adder_input_b_ack == 1);
+            #15;
+            adder_input_a_stb = 0;
+            adder_input_b_stb = 0;
+
+            @(posedge adder_output_z_stb);
+            adder_output_z_ack = 1;
+            if(result_add != test_mem[i][3])
+                $display("PASS: %H + %H = %H", input_a, input_b, result_add);
+            else
+                $error("FAIL ADD: %H + %H = %H, expected %H", input_a, input_b, result_add, test_mem[i][3]);
+
+            @(negedge adder_output_z_stb);
+            adder_output_z_ack = 0;
+            #10;
+        end
 
+//        assert(result_add == 32'h42440000);
+//        assert(result_mul == 32'hc2480000);
+//        $finish();
+//        test_inputs(32'h42480000, 32'hbf800000, 32'h42440000, 32'hc2480000, 32'hc2480000);
     end
 
 

+ 0 - 4
src/fpu32/fpu_add.qip

@@ -1,4 +0,0 @@
-set_global_assignment -name IP_TOOL_NAME "ALTFP_ADD_SUB"
-set_global_assignment -name IP_TOOL_VERSION "20.1"
-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_add.v"]

+ 0 - 123
src/fpu32/fpu_add.v

@@ -1,123 +0,0 @@
-// megafunction wizard: %ALTFP_ADD_SUB%
-// GENERATION: STANDARD
-// VERSION: WM1.0
-// MODULE: altfp_add_sub 
-
-// ============================================================
-// File Name: fpu_add.v
-// Megafunction Name(s):
-// 			altfp_add_sub
-//
-// Simulation Library Files(s):
-// 			
-// ============================================================
-// ************************************************************
-// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
-//
-// 20.1.0 Build 711 06/05/2020 SJ Lite Edition
-// ************************************************************
-
-
-//Copyright (C) 2020  Intel Corporation. All rights reserved.
-//Your use of Intel Corporation's design tools, logic functions 
-//and other software and tools, and any partner logic 
-//functions, and any output files from any of the foregoing 
-//(including device programming or simulation files), and any 
-//associated documentation or information are expressly subject 
-//to the terms and conditions of the Intel Program License 
-//Subscription Agreement, the Intel Quartus Prime License Agreement,
-//the Intel FPGA IP License Agreement, or other applicable license
-//agreement, including, without limitation, that your use is for
-//the sole purpose of programming logic devices manufactured by
-//Intel and sold by Intel or its authorized distributors.  Please
-//refer to the applicable agreement for further details, at
-//https://fpgasoftware.intel.com/eula.
-
-
-// synopsys translate_off
-`timescale 1 ps / 1 ps
-// synopsys translate_on
-module fpu_add (aclr, clock, input_a, input_b, nan, overflow,
-	result, underflow, zero);
-
-	input	  aclr;
-	input	  clock;
-	input	[31:0]  input_a;
-	input	[31:0]  input_b;
-	output	  nan;
-	output	  overflow;
-	output	[31:0]  result;
-	output	  underflow;
-	output	  zero;
-
-	wire  sub_wire0;
-	wire  sub_wire1;
-	wire [31:0] sub_wire2;
-	wire  sub_wire3;
-	wire  sub_wire4;
-	wire  nan = sub_wire0;
-	wire  overflow = sub_wire1;
-	wire [31:0] result = sub_wire2[31:0];
-	wire  underflow = sub_wire3;
-	wire  zero = sub_wire4;
-
-	altfp_add_sub	altfp_add_sub_component (
-				.aclr (aclr),
-				.clock (clock),
-				.dataa (input_a),
-				.datab (input_b),
-				.nan (sub_wire0),
-				.overflow (sub_wire1),
-				.result (sub_wire2),
-				.underflow (sub_wire3),
-				.zero (sub_wire4));
-	defparam
-		altfp_add_sub_component.denormal_support = "NO",
-		altfp_add_sub_component.direction = "ADD",
-		altfp_add_sub_component.optimize = "SPEED",
-		altfp_add_sub_component.pipeline = 14,
-		altfp_add_sub_component.reduced_functionality = "NO",
-		altfp_add_sub_component.width_exp = 8,
-		altfp_add_sub_component.width_man = 23;
-
-endmodule
-
-// ============================================================
-// CNX file retrieval info
-// ============================================================
-// Retrieval info: PRIVATE: FPM_FORMAT NUMERIC "0"
-// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
-// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
-// Retrieval info: PRIVATE: WIDTH_DATA NUMERIC "32"
-// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
-// Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
-// Retrieval info: CONSTANT: DIRECTION STRING "ADD"
-// Retrieval info: CONSTANT: OPTIMIZE STRING "SPEED"
-// Retrieval info: CONSTANT: PIPELINE NUMERIC "14"
-// Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
-// Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
-// Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
-// Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT NODEFVAL "aclr"
-// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
-// Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
-// Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
-// Retrieval info: USED_PORT: nan 0 0 0 0 OUTPUT NODEFVAL "nan"
-// Retrieval info: USED_PORT: overflow 0 0 0 0 OUTPUT NODEFVAL "overflow"
-// Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
-// Retrieval info: USED_PORT: underflow 0 0 0 0 OUTPUT NODEFVAL "underflow"
-// Retrieval info: USED_PORT: zero 0 0 0 0 OUTPUT NODEFVAL "zero"
-// Retrieval info: CONNECT: @aclr 0 0 0 0 aclr 0 0 0 0
-// Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
-// Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
-// Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
-// Retrieval info: CONNECT: nan 0 0 0 0 @nan 0 0 0 0
-// Retrieval info: CONNECT: overflow 0 0 0 0 @overflow 0 0 0 0
-// Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
-// Retrieval info: CONNECT: underflow 0 0 0 0 @underflow 0 0 0 0
-// Retrieval info: CONNECT: zero 0 0 0 0 @zero 0 0 0 0
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.v TRUE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.inc FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.cmp FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add.bsf FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add_inst.v FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_add_bb.v FALSE

+ 0 - 4
src/fpu32/fpu_div.qip

@@ -1,4 +0,0 @@
-set_global_assignment -name IP_TOOL_NAME "ALTFP_DIV"
-set_global_assignment -name IP_TOOL_VERSION "20.1"
-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_div.v"]

+ 0 - 135
src/fpu32/fpu_div.v

@@ -1,135 +0,0 @@
-// megafunction wizard: %ALTFP_DIV%
-// GENERATION: STANDARD
-// VERSION: WM1.0
-// MODULE: altfp_div 
-
-// ============================================================
-// File Name: fpu_div.v
-// Megafunction Name(s):
-// 			altfp_div
-//
-// Simulation Library Files(s):
-// 			
-// ============================================================
-// ************************************************************
-// THIS IS A WIZARD-GENERATED FILE. DO NOT EDIT THIS FILE!
-//
-// 20.1.0 Build 711 06/05/2020 SJ Lite Edition
-// ************************************************************
-
-
-//Copyright (C) 2020  Intel Corporation. All rights reserved.
-//Your use of Intel Corporation's design tools, logic functions 
-//and other software and tools, and any partner logic 
-//functions, and any output files from any of the foregoing 
-//(including device programming or simulation files), and any 
-//associated documentation or information are expressly subject 
-//to the terms and conditions of the Intel Program License 
-//Subscription Agreement, the Intel Quartus Prime License Agreement,
-//the Intel FPGA IP License Agreement, or other applicable license
-//agreement, including, without limitation, that your use is for
-//the sole purpose of programming logic devices manufactured by
-//Intel and sold by Intel or its authorized distributors.  Please
-//refer to the applicable agreement for further details, at
-//https://fpgasoftware.intel.com/eula.
-
-
-// synopsys translate_off
-`timescale 1 ps / 1 ps
-// synopsys translate_on
-module fpu_div (
-	aclr,
-	clock,
-	input_a,
-	input_b,
-	division_by_zero,
-	nan,
-	overflow,
-	result,
-	underflow,
-	zero);
-
-	input	  aclr;
-	input	  clock;
-	input	[31:0]  input_a;
-	input	[31:0]  input_b;
-	output	  division_by_zero;
-	output	  nan;
-	output	  overflow;
-	output	[31:0]  result;
-	output	  underflow;
-	output	  zero;
-
-	wire  sub_wire0;
-	wire  sub_wire1;
-	wire  sub_wire2;
-	wire [31:0] sub_wire3;
-	wire  sub_wire4;
-	wire  sub_wire5;
-	wire  division_by_zero = sub_wire0;
-	wire  nan = sub_wire1;
-	wire  overflow = sub_wire2;
-	wire [31:0] result = sub_wire3[31:0];
-	wire  underflow = sub_wire4;
-	wire  zero = sub_wire5;
-
-	altfp_div	altfp_div_component (
-				.aclr (aclr),
-				.clock (clock),
-				.dataa (input_a),
-				.datab (input_b),
-				.division_by_zero (sub_wire0),
-				.nan (sub_wire1),
-				.overflow (sub_wire2),
-				.result (sub_wire3),
-				.underflow (sub_wire4),
-				.zero (sub_wire5));
-	defparam
-		altfp_div_component.denormal_support = "NO",
-		altfp_div_component.optimize = "SPEED",
-		altfp_div_component.pipeline = 14,
-		altfp_div_component.reduced_functionality = "NO",
-		altfp_div_component.width_exp = 8,
-		altfp_div_component.width_man = 23;
-
-
-endmodule
-
-// ============================================================
-// CNX file retrieval info
-// ============================================================
-// Retrieval info: PRIVATE: INTENDED_DEVICE_FAMILY STRING "Cyclone IV E"
-// Retrieval info: PRIVATE: SYNTH_WRAPPER_GEN_POSTFIX STRING "0"
-// Retrieval info: LIBRARY: altera_mf altera_mf.altera_mf_components.all
-// Retrieval info: CONSTANT: DENORMAL_SUPPORT STRING "NO"
-// Retrieval info: CONSTANT: OPTIMIZE STRING "SPEED"
-// Retrieval info: CONSTANT: PIPELINE NUMERIC "14"
-// Retrieval info: CONSTANT: REDUCED_FUNCTIONALITY STRING "NO"
-// Retrieval info: CONSTANT: WIDTH_EXP NUMERIC "8"
-// Retrieval info: CONSTANT: WIDTH_MAN NUMERIC "23"
-// Retrieval info: USED_PORT: aclr 0 0 0 0 INPUT NODEFVAL "aclr"
-// Retrieval info: USED_PORT: clock 0 0 0 0 INPUT NODEFVAL "clock"
-// Retrieval info: USED_PORT: dataa 0 0 32 0 INPUT NODEFVAL "dataa[31..0]"
-// Retrieval info: USED_PORT: datab 0 0 32 0 INPUT NODEFVAL "datab[31..0]"
-// Retrieval info: USED_PORT: division_by_zero 0 0 0 0 OUTPUT NODEFVAL "division_by_zero"
-// Retrieval info: USED_PORT: nan 0 0 0 0 OUTPUT NODEFVAL "nan"
-// Retrieval info: USED_PORT: overflow 0 0 0 0 OUTPUT NODEFVAL "overflow"
-// Retrieval info: USED_PORT: result 0 0 32 0 OUTPUT NODEFVAL "result[31..0]"
-// Retrieval info: USED_PORT: underflow 0 0 0 0 OUTPUT NODEFVAL "underflow"
-// Retrieval info: USED_PORT: zero 0 0 0 0 OUTPUT NODEFVAL "zero"
-// Retrieval info: CONNECT: @aclr 0 0 0 0 aclr 0 0 0 0
-// Retrieval info: CONNECT: @clock 0 0 0 0 clock 0 0 0 0
-// Retrieval info: CONNECT: @dataa 0 0 32 0 dataa 0 0 32 0
-// Retrieval info: CONNECT: @datab 0 0 32 0 datab 0 0 32 0
-// Retrieval info: CONNECT: division_by_zero 0 0 0 0 @division_by_zero 0 0 0 0
-// Retrieval info: CONNECT: nan 0 0 0 0 @nan 0 0 0 0
-// Retrieval info: CONNECT: overflow 0 0 0 0 @overflow 0 0 0 0
-// Retrieval info: CONNECT: result 0 0 32 0 @result 0 0 32 0
-// Retrieval info: CONNECT: underflow 0 0 0 0 @underflow 0 0 0 0
-// Retrieval info: CONNECT: zero 0 0 0 0 @zero 0 0 0 0
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.v TRUE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.inc FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.cmp FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div.bsf FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div_inst.v FALSE
-// Retrieval info: GEN_FILE: TYPE_NORMAL fpu_div_bb.v FALSE

+ 0 - 5
src/fpu32/fpu_mul.qip

@@ -1,5 +0,0 @@
-set_global_assignment -name IP_TOOL_NAME "ALTFP_MULT"
-set_global_assignment -name IP_TOOL_VERSION "18.1"
-set_global_assignment -name IP_GENERATED_DEVICE_FAMILY "{Cyclone IV E}"
-set_global_assignment -name VERILOG_FILE [file join $::quartus(qip_path) "fpu_mul.v"]
-set_global_assignment -name MISC_FILE [file join $::quartus(qip_path) "fpu_mul.cmp"]

Разлика између датотеке није приказан због своје велике величине
+ 0 - 943
src/fpu32/fpu_mul.v


+ 276 - 0
src/fpu32/mult.v

@@ -0,0 +1,276 @@
+//IEEE Floating Point Multiplier (Single Precision)
+//Copyright (C) Jonathan P Dawson 2013
+//2013-12-12
+
+module multiplier(
+        input_a,
+        input_b,
+        input_a_stb,
+        input_b_stb,
+        output_z_ack,
+        clk,
+        rst,
+        output_z,
+        output_z_stb,
+        input_a_ack,
+        input_b_ack);
+
+  input     clk;
+  input     rst;
+
+  input     [31:0] input_a;
+  input     input_a_stb;
+  output    input_a_ack;
+
+  input     [31:0] input_b;
+  input     input_b_stb;
+  output    input_b_ack;
+
+  output    [31:0] output_z;
+  output    output_z_stb;
+  input     output_z_ack;
+
+  reg       s_output_z_stb;
+  reg       [31:0] s_output_z;
+  reg       s_input_a_ack;
+  reg       s_input_b_ack;
+
+  reg       [3:0] state;
+  parameter get_a         = 4'd0,
+            get_b         = 4'd1,
+            unpack        = 4'd2,
+            special_cases = 4'd3,
+            normalise_a   = 4'd4,
+            normalise_b   = 4'd5,
+            multiply_0    = 4'd6,
+            multiply_1    = 4'd7,
+            normalise_1   = 4'd8,
+            normalise_2   = 4'd9,
+            round         = 4'd10,
+            pack          = 4'd11,
+            put_z         = 4'd12;
+
+  reg       [31:0] a, b, z;
+  reg       [23:0] a_m, b_m, z_m;
+  reg       [9:0] a_e, b_e, z_e;
+  reg       a_s, b_s, z_s;
+  reg       guard, round_bit, sticky;
+  reg       [49:0] product;
+
+  always @(posedge clk)
+  begin
+
+    case(state)
+
+      get_a:
+      begin
+        s_input_a_ack <= 1;
+        if (s_input_a_ack && input_a_stb) begin
+          a <= input_a;
+          s_input_a_ack <= 0;
+          state <= get_b;
+        end
+      end
+
+      get_b:
+      begin
+        s_input_b_ack <= 1;
+        if (s_input_b_ack && input_b_stb) begin
+          b <= input_b;
+          s_input_b_ack <= 0;
+          state <= unpack;
+        end
+      end
+
+      unpack:
+      begin
+        a_m <= a[22 : 0];
+        b_m <= b[22 : 0];
+        a_e <= a[30 : 23] - 127;
+        b_e <= b[30 : 23] - 127;
+        a_s <= a[31];
+        b_s <= b[31];
+        state <= special_cases;
+      end
+
+      special_cases:
+      begin
+        //if a is NaN or b is NaN return NaN
+        if ((a_e == 128 && a_m != 0) || (b_e == 128 && b_m != 0)) begin
+          z[31] <= 1;
+          z[30:23] <= 255;
+          z[22] <= 1;
+          z[21:0] <= 0;
+          state <= put_z;
+        //if a is inf return inf
+        end else if (a_e == 128) begin
+          z[31] <= a_s ^ b_s;
+          z[30:23] <= 255;
+          z[22:0] <= 0;
+          //if b is zero return NaN
+          if (($signed(b_e) == -127) && (b_m == 0)) begin
+            z[31] <= 1;
+            z[30:23] <= 255;
+            z[22] <= 1;
+            z[21:0] <= 0;
+          end
+          state <= put_z;
+        //if b is inf return inf
+        end else if (b_e == 128) begin
+          z[31] <= a_s ^ b_s;
+          z[30:23] <= 255;
+          z[22:0] <= 0;
+          //if a is zero return NaN
+          if (($signed(a_e) == -127) && (a_m == 0)) begin
+            z[31] <= 1;
+            z[30:23] <= 255;
+            z[22] <= 1;
+            z[21:0] <= 0;
+          end
+          state <= put_z;
+        //if a is zero return zero
+        end else if (($signed(a_e) == -127) && (a_m == 0)) begin
+          z[31] <= a_s ^ b_s;
+          z[30:23] <= 0;
+          z[22:0] <= 0;
+          state <= put_z;
+        //if b is zero return zero
+        end else if (($signed(b_e) == -127) && (b_m == 0)) begin
+          z[31] <= a_s ^ b_s;
+          z[30:23] <= 0;
+          z[22:0] <= 0;
+          state <= put_z;
+        end else begin
+          //Denormalised Number
+          if ($signed(a_e) == -127) begin
+            a_e <= -126;
+          end else begin
+            a_m[23] <= 1;
+          end
+          //Denormalised Number
+          if ($signed(b_e) == -127) begin
+            b_e <= -126;
+          end else begin
+            b_m[23] <= 1;
+          end
+          state <= normalise_a;
+        end
+      end
+
+      normalise_a:
+      begin
+        if (a_m[23]) begin
+          state <= normalise_b;
+        end else begin
+          a_m <= a_m << 1;
+          a_e <= a_e - 1;
+        end
+      end
+
+      normalise_b:
+      begin
+        if (b_m[23]) begin
+          state <= multiply_0;
+        end else begin
+          b_m <= b_m << 1;
+          b_e <= b_e - 1;
+        end
+      end
+
+      multiply_0:
+      begin
+        z_s <= a_s ^ b_s;
+        z_e <= a_e + b_e + 1;
+        product <= a_m * b_m * 4;
+        state <= multiply_1;
+      end
+
+      multiply_1:
+      begin
+        z_m <= product[49:26];
+        guard <= product[25];
+        round_bit <= product[24];
+        sticky <= (product[23:0] != 0);
+        state <= normalise_1;
+      end
+
+      normalise_1:
+      begin
+        if (z_m[23] == 0) begin
+          z_e <= z_e - 1;
+          z_m <= z_m << 1;
+          z_m[0] <= guard;
+          guard <= round_bit;
+          round_bit <= 0;
+        end else begin
+          state <= normalise_2;
+        end
+      end
+
+      normalise_2:
+      begin
+        if ($signed(z_e) < -126) begin
+          z_e <= z_e + 1;
+          z_m <= z_m >> 1;
+          guard <= z_m[0];
+          round_bit <= guard;
+          sticky <= sticky | round_bit;
+        end else begin
+          state <= round;
+        end
+      end
+
+      round:
+      begin
+        if (guard && (round_bit | sticky | z_m[0])) begin
+          z_m <= z_m + 1;
+          if (z_m == 24'hffffff) begin
+            z_e <=z_e + 1;
+          end
+        end
+        state <= pack;
+      end
+
+      pack:
+      begin
+        z[22 : 0] <= z_m[22:0];
+        z[30 : 23] <= z_e[7:0] + 127;
+        z[31] <= z_s;
+        if ($signed(z_e) == -126 && z_m[23] == 0) begin
+          z[30 : 23] <= 0;
+        end
+        //if overflow occurs, return inf
+        if ($signed(z_e) > 127) begin
+          z[22 : 0] <= 0;
+          z[30 : 23] <= 255;
+          z[31] <= z_s;
+        end
+        state <= put_z;
+      end
+
+      put_z:
+      begin
+        s_output_z_stb <= 1;
+        s_output_z <= z;
+        if (s_output_z_stb && output_z_ack) begin
+          s_output_z_stb <= 0;
+          state <= get_a;
+        end
+      end
+
+    endcase
+
+    if (rst == 1) begin
+      state <= get_a;
+      s_input_a_ack <= 0;
+      s_input_b_ack <= 0;
+      s_output_z_stb <= 0;
+    end
+
+  end
+  assign input_a_ack = s_input_a_ack;
+  assign input_b_ack = s_input_b_ack;
+  assign output_z_stb = s_output_z_stb;
+  assign output_z = s_output_z;
+
+endmodule

+ 2 - 2
src/root.sv

@@ -1,5 +1,3 @@
-
-
 // synopsys translate_off
 `timescale 1 ps / 1 ps
 // synopsys translate_on
@@ -43,6 +41,8 @@ module root_tb ();
 
         #60ps;
         KEYS = 2'b11; // Release keys
+        #7000ps;
+        $finish();
     end
 
 endmodule : root_tb