2 次代码提交 68a6cc6eb6 ... df9f091caa

作者 SHA1 备注 提交日期
  Min df9f091caa Merge remote-tracking branch 'origin/master' 4 年之前
  Min b0b9813d96 Pipelined nerual model WIP 4 年之前
共有 5 个文件被更改,包括 273 次插入2 次删除
  1. 54 0
      src/neural/comp.sv
  2. 80 1
      src/neural/layer.sv
  3. 42 1
      src/neural/neural.sv
  4. 43 0
      src/neural/neuron.sv
  5. 54 0
      src/neural/sigmoid.sv

+ 54 - 0
src/neural/comp.sv

@@ -61,6 +61,60 @@ x size: 2**K
 left io size: 2**K
 */
 
+module adder_casc_sync#(parameter K,N=32)(clk, rst, x, y);
+    input logic clk;
+    input logic rst;
+    input wire [N-1:0] x [2**K-1:0];
+    output logic [N-1:0] y;
+    wire [N-1:0] layer_w [2**K-3:0];
+
+    genvar i,j;
+    generate
+        for(i=0; i<K; i++) begin : generate_layers
+            // First layer
+            if(i == 0) begin
+                for(j=0; j<2**(K-1); j++) begin : gen_first_layer
+                    neural_adder a(
+                        .clk(clk),
+                        .rst(rst),
+                        .x0(x[j*2]),
+                        .x1(x[j*2+1]),
+                        .y(layer_w[j])
+                    );
+                end
+            end
+            // Last layer
+            else if((K-i) <= 1) begin
+                localparam s0 = 2**K-4;
+                localparam s1 = s0+1;
+                neural_adder c(
+                    .clk(clk),
+                    .rst(rst),
+                    .x0(layer_w[s0]),
+                    .x1(layer_w[s1]),
+                    .y(y)
+                );
+            end
+            // Middle layer
+            else begin
+                for(j=0; j<2**(K-i-1); j++) begin : gen_mid_layer
+                    localparam s = $floor((2.0**(K-1.0) * (2.0**(i-1)-1.0)/2.0**(i-1))+j);
+                    localparam ix = s*2;
+                    localparam ix1 = s*2+1;
+                    localparam iy = s+2**(K-1);
+                    neural_adder b(
+                        .clk(clk),
+                        .rst(rst),
+                        .x0(layer_w[ix]),
+                        .x1(layer_w[ix1]),
+                        .y(layer_w[iy])
+                    );
+                end
+            end
+        end
+    endgenerate
+endmodule : adder_casc_sync
+
 module adder_casc#(parameter K,N=32)(clk, rst, x, y, left, right);
     input logic clk;
     input logic rst;

+ 80 - 1
src/neural/layer.sv

@@ -47,6 +47,31 @@ module neuron_layer#(parameter C, K, N=32)(clk, rst, x, y, w, b, left, right);
 
 endmodule : neuron_layer
 
+module neuron_layer_sync#(parameter C, K, N=32)(clk, rst, x, y, w, b);
+    localparam NEURONS = 2**K;
+    localparam CONNS = 2**C;
+
+    input wire clk, rst;
+    input wire [N-1:0] x [CONNS-1:0];
+    input wire [N-1:0] w [NEURONS-1:0][CONNS-1:0];
+    input wire [N-1:0] b [NEURONS-1:0];
+    output wire [N-1:0] y [NEURONS-1:0];
+
+    genvar i;
+    generate
+        for(i=0; i<NEURONS; i++) begin: gen_neruons
+            neuron_sync#(.K(C), .N(N)) n(
+                .clk(clk),
+                .rst(rst),
+                .x(x),
+                .y(y[i]),
+                .w(w[i]),
+                .b(b[i])
+            );
+        end
+    endgenerate
+endmodule : neuron_layer_sync
+
 /*
 Testbench for a 8x8x2 neuron network as shown below:
 
@@ -137,6 +162,55 @@ module neuron_network_tb;
         .right(right[1])
     );
 
+    /* ******************
+    Synchronious network
+    ********************/
+    reg [31:0] layer1_s [7:0];
+    reg [31:0] layer2_s [7:0];
+    reg [31:0] layer3_s [1:0];
+    reg [31:0] ys [1:0];
+
+    neuron_layer_sync#(.C(2), .K(3)) layer_s1(
+        .clk(clk),
+        .rst(rst),
+        .x(x),
+        .y(layer1_s),
+        .w(layer1_w),
+        .b(layer1_b)
+    );
+
+    neuron_layer_sync#(.C(3), .K(3)) layer_s2(
+        .clk(clk),
+        .rst(rst),
+        .x(layer1_s),
+        .y(layer2_s),
+        .w(layer2_w),
+        .b(layer2_b)
+    );
+
+    neuron_layer_sync#(.C(3), .K(1)) layer_s3(
+        .clk(clk),
+        .rst(rst),
+        .x(layer2_s),
+        .y(layer3_s),
+        .w(layer3_w),
+        .b(layer3_b)
+    );
+
+    hard_sigmoid_sync sigmoid_s0(
+        .clk(clk),
+        .rst(rst),
+        .x(layer3_s[0]),
+        .y(ys[0])
+    );
+
+    hard_sigmoid_sync sigmoid_s1(
+        .clk(clk),
+        .rst(rst),
+        .x(layer3_o[1]),
+        .y(ys[1])
+    );
+
     initial forever #5 clk = ~clk;
 
     initial begin
@@ -207,4 +281,9 @@ module neuron_network_tb;
     endtask : read_value
 
 
-endmodule : neuron_network_tb
+endmodule : neuron_network_tb
+
+
+module neuron_network2_tb;
+
+endmodule : neuron_network2_tb

+ 42 - 1
src/neural/neural.sv

@@ -1,4 +1,45 @@
 `include "comp.sv"
 `include "sigmoid.sv"
 `include "neuron.sv"
-`include "layer.sv"
+`include "layer.sv"
+
+
+module neural_adder(clk, rst, x0, x1, y);
+    input clk, rst;
+    input [31:0] x0, x1;
+    output [31:0] y;
+    floating_add#(.N(32), .M(8)) add0(
+        .input_1(x0),
+        .input_2(x1),
+        .sum(y),
+        .diff(),
+        .clk(clk),
+        .reset(rst)
+    );
+endmodule : neural_adder
+
+
+module neural_mult(clk, rst, x0, x1, y);
+    input clk, rst;
+    input [31:0] x0, x1;
+    output [31:0] y;
+    floating_product#(.N(32), .M(8)) mul0(
+        .input_1(x0),
+        .input_2(x1),
+        .product(y),
+        .clk(clk),
+        .reset(rst)
+    );
+endmodule : neural_mult
+
+module neural_comp_gt(x0, x1, y);
+    input [31:0] x0, x1;
+    output y;
+    fpu32_gt gt0(x0, x1, y0);
+endmodule : neural_comp_gt
+
+module neural_comp_lt(x0, x1, y);
+    input [31:0] x0, x1;
+    output y;
+    fpu32_lt lt0(x0, x1, y0);
+endmodule : neural_comp_lt

+ 43 - 0
src/neural/neuron.sv

@@ -17,6 +17,49 @@
 
 */
 
+module neuron_sync#(parameter K, N=32)(clk, rst, x, y, w, b);
+    localparam M = 2**K;
+    input wire [N-1:0] x [M-1:0];
+    input wire [N-1:0] w [M-1:0];
+    input wire [N-1:0] b;
+    output logic [N-1:0] y;
+
+    input wire clk;
+    input wire rst;
+
+    wire [N-1:0] inner_w [M-1:0];
+    wire [N-1:0] casc_w;
+
+    genvar i;
+    generate
+        for(i=0;i<M;i++) begin: gen_mult_layer
+            neural_mult mult(
+                .clk(clk),
+                .rst(rst),
+                .x0(x[i]),
+                .x1(w[i]),
+                .y(inner_w[i])
+            );
+        end
+    endgenerate
+
+    adder_casc_sync#(.K(K), .N(N)) adder0(
+        .clk(clk),
+        .rst(rst),
+        .x(inner_w),
+        .y(casc_w)
+    );
+
+    neural_adder adder1 (
+        .clk(clk),
+        .rst(rst),
+        .x0(b),
+        .x1(casc_w),
+        .y(y)
+    );
+
+endmodule : neuron_sync
+
 module neuron#(parameter K, N=32)(x, y, w, b, ack, stb, right, clk, rst);
     localparam M = 2**K;
     input wire [N-1:0] x [M-1:0];

+ 54 - 0
src/neural/sigmoid.sv

@@ -123,6 +123,60 @@ module hard_sigmoid #(parameter N=32)(clk, rst, x, y, left, right);
 
 endmodule : hard_sigmoid
 
+module hard_sigmoid_sync #(parameter N=32)(clk, rst, x, y);
+    input clk, rst;
+    input [N-1:0] x;
+    output logic [N-1:0] y;
+    logic [N-1:0] value, comp_result;
+
+    logic gt_neg0, gt_neg1;
+    logic lt_pos0, lt_pos1;
+    wire [N-1:0] join_value;
+
+    // Multiply by 0.2
+    neural_mult mult0(
+        .clk(clk),
+        .rst(rst),
+        .x0('h3e4ccccd),
+        .x1(join_value),
+        .y(comp_result)
+    );
+
+    // Add +2.5
+    neural_adder add0(
+        .clk(clk),
+        .rst(rst),
+        .x0('h40200000),
+        .x1(value),
+        .y(join_value)
+    );
+
+    neural_comp_gt gt0(value, 'hc0200000, gt_neg0); // more then -2.5
+    neural_comp_lt lt0(value, 'h40200000, lt_pos0); // less then +2.5
+
+    always_ff @(posedge clk) begin
+        gt_neg1 <= gt_neg0;
+        lt_pos1 <= lt_pos0;
+        if(~gt_neg1) begin
+            y <= 0;
+        end
+        // if in between -2.5 and 2.5
+        else if(gt_neg1 & lt_pos1) begin
+            y <= comp_result;
+        end
+        // if more than 2.5 ouput 1
+        else begin
+            y <= 'h3f800000;
+        end
+
+        if(rst) begin
+            gt_neg1 <= 0;
+            lt_pos1 <= 0;
+        end
+    end
+
+endmodule : hard_sigmoid_sync
+
 
 module hard_sigmoid_tb;
     reg rst, clk;