From 750347086b8e6217674f9a15d88aa4662b96b79f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20B=2E=20Preu=C3=9Fer?= Date: Tue, 16 Jul 2024 12:29:50 +0100 Subject: [PATCH] Align datatype extension capabilities between all DSP-based MVU implementations. --- finn-rtllib/mvu/mvu_4sx4u.sv | 10 +- finn-rtllib/mvu/mvu_8sx8u_dsp48.sv | 4 +- finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv | 7 +- finn-rtllib/mvu/mvu_vvu_axi.sv | 23 ++-- finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv | 165 ++++++++++++++++++++++++++ 5 files changed, 193 insertions(+), 16 deletions(-) create mode 100644 finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv diff --git a/finn-rtllib/mvu/mvu_4sx4u.sv b/finn-rtllib/mvu/mvu_4sx4u.sv index ccb25380c8..1f6e97281e 100644 --- a/finn-rtllib/mvu/mvu_4sx4u.sv +++ b/finn-rtllib/mvu/mvu_4sx4u.sv @@ -34,6 +34,8 @@ module mvu_4sx4u #( int unsigned PE, int unsigned SIMD, + int unsigned WEIGHT_WIDTH, + int unsigned ACTIVATION_WIDTH, int unsigned ACCU_WIDTH, int unsigned VERSION = 1, // Version 1 (DSP48E1) *must* commit to NARROW_WEIGHTS @@ -49,8 +51,8 @@ module mvu_4sx4u #( // Input input logic last, input logic zero, // ignore current inputs and force this partial product to zero - input logic signed [PE-1:0][SIMD-1:0][3:0] w, // signed weights - input logic [SIMD-1:0][3:0] a, // unsigned activations (override by SIGNED_ACTIVATIONS) + input logic signed [PE-1:0][SIMD-1:0][WEIGHT_WIDTH -1:0] w, // signed weights + input logic [SIMD-1:0][ACTIVATION_WIDTH-1:0] a, // unsigned activations (override by SIGNED_ACTIVATIONS) // Ouput output logic vld, @@ -141,14 +143,14 @@ module mvu_4sx4u #( for(genvar s = 0; s < SIMD; s++) begin : genSIMD // Input Lane Assembly - uwire [17:0] bb = { {(14){SIGNED_ACTIVATIONS && a[s][3]}}, a[s] }; + uwire [17:0] bb = { {(18-ACTIVATION_WIDTH){SIGNED_ACTIVATIONS && a[s][ACTIVATION_WIDTH-1]}}, a[s] }; logic [29:0] aa; logic [26:0] dd; logic [ 1:0] xx[3:1]; if(1) begin : blkVectorize uwire signed [3:0] ww[PE_END - PE_BEG]; for(genvar pe = 0; pe < PE_END - PE_BEG; pe++) begin - assign ww[pe] = w[PE_BEG + pe][s]; + assign ww[pe] = $signed(w[PE_BEG + pe][s]); if(pe > 0) begin if(BEHAVIORAL) assign xx[pe + PE_REM] = zero? 0 : ww[pe] * a[s]; `ifndef VERILATOR diff --git a/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv b/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv index 414c4b0be0..107a00918e 100644 --- a/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv +++ b/finn-rtllib/mvu/mvu_8sx8u_dsp48.sv @@ -34,9 +34,9 @@ module mvu_8sx8u_dsp48 #( int unsigned PE, int unsigned SIMD, - int unsigned ACCU_WIDTH, - int unsigned ACTIVATION_WIDTH, int unsigned WEIGHT_WIDTH, + int unsigned ACTIVATION_WIDTH, + int unsigned ACCU_WIDTH, int unsigned VERSION = 1, bit SIGNED_ACTIVATIONS = 0, diff --git a/finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv b/finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv index 3bbc7051b9..2734f37cf3 100644 --- a/finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv +++ b/finn-rtllib/mvu/mvu_vvu_8sx9_dsp58.sv @@ -35,9 +35,10 @@ module mvu_vvu_8sx9_dsp58 #( bit IS_MVU, int unsigned PE, int unsigned SIMD, - int unsigned ACTIVATION_WIDTH, - int unsigned WEIGHT_WIDTH, - int unsigned ACCU_WIDTH, + int unsigned WEIGHT_WIDTH, + int unsigned ACTIVATION_WIDTH, + int unsigned ACCU_WIDTH, + bit SIGNED_ACTIVATIONS = 0, int unsigned SEGMENTLEN = 0, // Default to 0 (which implies a single segment) bit FORCE_BEHAVIORAL = 0, diff --git a/finn-rtllib/mvu/mvu_vvu_axi.sv b/finn-rtllib/mvu/mvu_vvu_axi.sv index 35325abdf9..0ee84b2f79 100644 --- a/finn-rtllib/mvu/mvu_vvu_axi.sv +++ b/finn-rtllib/mvu/mvu_vvu_axi.sv @@ -300,9 +300,13 @@ module mvu_vvu_axi #( case(COMPUTE_CORE) "mvu_vvu_8sx9_dsp58": - mvu_vvu_8sx9_dsp58 #(.IS_MVU(IS_MVU), .PE(PE), .SIMD(DSP_SIMD), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH), - .ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN), - .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core ( + mvu_vvu_8sx9_dsp58 #( + .IS_MVU(IS_MVU), + .PE(PE), .SIMD(DSP_SIMD), + .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .SEGMENTLEN(SEGMENTLEN), + .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL) + ) core ( .clk(dsp_clk), .rst, .en(dsp_en), .last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a), .vld(dsp_vld), .p(dsp_p) @@ -310,7 +314,8 @@ module mvu_vvu_axi #( "mvu_4sx4u_dsp48e1": mvu_4sx4u #( .PE(PE), .SIMD(DSP_SIMD), - .ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS), + .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS), .VERSION(1), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL) ) core ( .clk(dsp_clk), .rst, .en(dsp_en), @@ -320,7 +325,8 @@ module mvu_vvu_axi #( "mvu_4sx4u_dsp48e2": mvu_4sx4u #( .PE(PE), .SIMD(DSP_SIMD), - .ACCU_WIDTH(ACCU_WIDTH), .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS), + .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .NARROW_WEIGHTS(NARROW_WEIGHTS), .VERSION(2), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL) ) core ( .clk(dsp_clk), .rst, .en(dsp_en), @@ -328,8 +334,11 @@ module mvu_vvu_axi #( .vld(dsp_vld), .p(dsp_p) ); "mvu_8sx8u_dsp48": - mvu_8sx8u_dsp48 #(.PE(PE), .SIMD(DSP_SIMD), .ACCU_WIDTH(ACCU_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .WEIGHT_WIDTH(WEIGHT_WIDTH), - .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL)) core ( + mvu_8sx8u_dsp48 #( + .PE(PE), .SIMD(DSP_SIMD), + .WEIGHT_WIDTH(WEIGHT_WIDTH), .ACTIVATION_WIDTH(ACTIVATION_WIDTH), .ACCU_WIDTH(ACCU_WIDTH), + .SIGNED_ACTIVATIONS(SIGNED_ACTIVATIONS), .FORCE_BEHAVIORAL(FORCE_BEHAVIORAL) + ) core ( .clk(dsp_clk), .rst, .en(dsp_en), .last(dsp_last), .zero(dsp_zero), .w(dsp_w), .a(dsp_a), .vld(dsp_vld), .p(dsp_p) diff --git a/finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv b/finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv new file mode 100644 index 0000000000..783218e08c --- /dev/null +++ b/finn-rtllib/mvu/tb/mvu_3sx3u_tb.sv @@ -0,0 +1,165 @@ +module mvu_3sx3u_tb; + + localparam int unsigned ROUNDS = 157; + + localparam int unsigned MH = 32; + localparam int unsigned MW = 60; + localparam int unsigned PE = 1; + localparam int unsigned SIMD = 1; + + localparam int unsigned ACTIVATION_WIDTH = 3; + localparam int unsigned WEIGHT_WIDTH = 3; + localparam int unsigned ACCU_WIDTH = 16; + + + //----------------------------------------------------------------------- + // Global Control + logic clk = 1; + always #5ns clk = !clk; + + logic rst = 1; + initial begin + repeat(16) @(posedge clk); + rst <= 0; + end + + //----------------------------------------------------------------------- + // DUT + logic [PE-1:0][SIMD-1:0][WEIGHT_WIDTH-1:0] s_axis_weights_tdata; + logic s_axis_weights_tvalid; + uwire s_axis_weights_tready; + + logic [SIMD-1:0][ACTIVATION_WIDTH-1:0] s_axis_input_tdata; + logic s_axis_input_tvalid; + uwire s_axis_input_tready; + + uwire [PE-1:0][ACCU_WIDTH-1:0] m_axis_output_tdata; + uwire m_axis_output_tvalid; + logic m_axis_output_tready; + + mvu_vvu_axi #( + .IS_MVU(1), + .COMPUTE_CORE("mvu_4sx4u_dsp48e2"), + .MH(MH), .MW(MW), + .PE(PE), .SIMD(SIMD), + + .ACTIVATION_WIDTH(ACTIVATION_WIDTH), + .WEIGHT_WIDTH(WEIGHT_WIDTH), + .ACCU_WIDTH(ACCU_WIDTH) + //int unsigned SEGMENTLEN = 0, + //bit FORCE_BEHAVIORAL = 0, + ) dut ( + .ap_clk(clk), .ap_clk2x('x), .ap_rst_n(!rst), + .s_axis_weights_tdata, .s_axis_weights_tvalid, .s_axis_weights_tready, + .s_axis_input_tdata, .s_axis_input_tvalid, .s_axis_input_tready, + .m_axis_output_tdata, .m_axis_output_tvalid, .m_axis_output_tready + ); + + //----------------------------------------------------------------------- + // Stimuli + + //- Infinite Weight Feed ------------ + typedef logic signed [WEIGHT_WIDTH-1:0] weights_t[MH][MW]; + function weights_t calc_WEIGHTS(); + automatic weights_t ret; + std::randomize(ret); + return ret; + endfunction : calc_WEIGHTS + weights_t WEIGHTS = calc_WEIGHTS(); + + initial begin + s_axis_weights_tdata = 'x; + s_axis_weights_tvalid = 0; + @(posedge clk iff !rst); + + forever begin + for(int unsigned h = 0; h < MH; h+=PE) begin + for(int unsigned w = 0; w < MW; w+=SIMD) begin + for(int unsigned pe = 0; pe < PE; pe++) begin + for(int unsigned simd = 0; simd < SIMD; simd++) begin + s_axis_weights_tdata[pe][simd] <= WEIGHTS[h+pe][w+simd]; + end + end + s_axis_weights_tvalid <= 1; + @(posedge clk iff s_axis_weights_tready); + s_axis_weights_tvalid <= 0; + s_axis_weights_tdata <= 'x; + end + end + end + end + + //- Input Feed and Reference Computation + typedef logic [PE-1:0][ACCU_WIDTH-1:0] outvec_t; + outvec_t Q_ref[$] = {}; + + initial begin + s_axis_input_tdata = 'x; + s_axis_input_tvalid = 0; + @(posedge clk iff !rst); + + repeat(ROUNDS) begin : blkRounds + automatic logic [MH-1:0][ACCU_WIDTH-1:0] accus = '{ default: 0 }; + + for(int unsigned w = 0; w < MW; w+=SIMD) begin : blkSF + for(int unsigned simd = 0; simd < SIMD; simd++) begin : blkSIMD + automatic logic [ACTIVATION_WIDTH-1:0] act = $urandom(); + for(int unsigned h = 0; h < MH; h++) begin : blkMH + automatic logic signed [ACCU_WIDTH-1:0] prod = WEIGHTS[h][w+simd] * $signed({1'b0, act}); + accus[h] += prod; + end : blkMH + s_axis_input_tdata[simd] <= act; + end : blkSIMD + s_axis_input_tvalid <= 1; + @(posedge clk iff s_axis_input_tready); + s_axis_input_tvalid <= 0; + s_axis_input_tdata <= 'x; + end : blkSF + + for(int unsigned h = 0; h < MH; h+=PE) begin + Q_ref.push_back(accus[h+:PE]); + end + + end : blkRounds + end + + //- Output Checker + initial begin + automatic int timeout = 0; + + m_axis_output_tready = 0; + @(posedge clk iff !rst); + + m_axis_output_tready <= 1; + while(timeout < MW/SIMD+16) begin + @(posedge clk); + if(!m_axis_output_tvalid) timeout++; + else begin + automatic outvec_t exp; + + assert(Q_ref.size()) else begin + $error("Spurious output."); + $stop; + end + + exp = Q_ref.pop_front(); + assert(m_axis_output_tdata === exp) else begin + $error("Mismatched output %p instead of %p.", m_axis_output_tdata, exp); + $stop; + end + + timeout = 0; + end + end + m_axis_output_tready <= 0; + + assert(Q_ref.size() == 0) else begin + $error("Missing output."); + $stop; + end + + $display("Test completed."); + $finish; + end + +endmodule : mvu_3sx3u_tb