diff options
Diffstat (limited to 'src/ml_engine.v')
| -rw-r--r-- | src/ml_engine.v | 563 |
1 files changed, 563 insertions, 0 deletions
diff --git a/src/ml_engine.v b/src/ml_engine.v new file mode 100644 index 0000000..ec50d87 --- /dev/null +++ b/src/ml_engine.v @@ -0,0 +1,563 @@ +/* + * ml_engine.v + * + * Copyright (C) 2025 Private Island Networks Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * function: Machine Learning Engine Framework + * + * see https://privateisland.tech/dev/pi-ml-framework + * + */ + +module ml_engine #(parameter NUM_IF=8, DPRAM_DEPTH=1024) +( + input rstn, + input clk, + + // controller interface + input cont_clk, + input cont_sel, + input cont_we, + input [15:0] cont_addr, + input [15:0] cont_d_i, + output reg [15:0] cont_d_o, + output cont_tgt_ready, + + // module interface + output reg evt_start, + output reg evt_active, + output reg [NUM_IF-1:0] enable, + input [NUM_IF-1:0] empty, + input clk_e, + input we, + input [8:0] d_i, + + // switch interface + output reg fifo_empty_o, + input fifo_re, + output [8:0] fifo_d_o, + output [10:0] byte_cnt + + // action interface + + // memory/coefficient interface + +); + +`define DIRECT_OUTPUT // this disables processing and second DPRAM + + localparam BLOCK_OFFSET = 'h80; // 128 bytes + localparam BLOCK_OFFSET_SHIT = 'd7; // Left shift to multiple by 128 + +`ifdef SIMULATION + localparam EVT_CNT_DELAY_1 = 32'h0000_0010, + EVT_CNT_DELAY_2 = 32'h0000_0020, + EVT_CNT_OUT = 32'h0000_0030, + EVT_CNT_STOP = 32'h0000_00C0, + EVT_CNT_MAX = 32'h0000_0100; +`else + localparam EVT_CNT_DELAY_1 = 32'h0000_0010, + EVT_CNT_DELAY_2 = 32'h0000_0020, + EVT_CNT_OUT = 32'h0000_0030, + EVT_CNT_STOP = 32'h0800_0000, + EVT_CNT_MAX = 32'h1000_0000; // Sets max event interval +`endif + + + // Controller I/F Addresses + localparam MLE_ENABLE_ADDR = 'h0; + + // FSM States: two loops: event/block and data unit (DU) + localparam MLE_ST_IDLE=4'h0, MLE_ST_START=4'h1, MLE_ST_EVT_START = 4'h2, + MLE_ST_DU_START = 4'h3, MLE_ST_DU_CONT = 4'h4, MLE_ST_DU_DONE = 4'h5, + MLE_ST_EVT_DONE = 4'h6; + + // variables + reg mle_enable, mle_enable_m1, mle_enable_m2; + reg[31:0] evt_counter; + reg evt_delay_1, evt_delay_2, evt_delay_out; + + reg [NUM_IF-1:0] empty_m1, empty_m2; + + // Set up 1K DPRAM as 8 blocks of 128 words. + wire [$clog2(DPRAM_DEPTH)-1:0] wr_addr0, wr_addr1; + wire [$clog2(DPRAM_DEPTH)-1:0] rd_addr0, rd_addr1; + reg [2:0] wr_block0, wr_block1; + reg [2:0] rd_block0, rd_block1; + reg [6:0] wr_ptr0, wr_ptr1; + reg [6:0] rd_ptr0, rd_ptr1; + wire rd_oe0; + reg [6:0] cnt0; + reg [$clog2(DPRAM_DEPTH)-1:0] pkt_sz; + + wire [8:0] d_s0_o, d_i_internal; + + reg [NUM_IF-1:0] enable_logic_active; // enable logic active + + wire we0, we1; + + reg [3:0] mle_0_state, mle_1_state, mle_2_state; + reg d_out_avail; + wire [8:0] fifo_d; + reg fifo_empty; + reg fifo_d_out_flag; + + // Debug + reg [8*12:1] mle_0_state_str; + + + /****************************************************** + + Controller Interface + + System's internal controller can write and read important parameters + + ******************************************************/ + + // Controller Read Data Mux + always @(posedge cont_clk, negedge rstn) + if (!rstn) + cont_d_o <= 16'hcccc; + else + case (cont_addr) + MLE_ENABLE_ADDR: cont_d_o <= mle_enable; + default: cont_d_o <= cont_d_o; + endcase + + // TODO: add logic to prevent controller reading metastable data + assign cont_tgt_ready = 1'b1; + + + // mle_enable: enable / disable the MLE + always @(posedge cont_clk, negedge rstn) + if (!rstn) + mle_enable <= 1'b0; + else if (cont_we && cont_sel && cont_addr == MLE_ENABLE_ADDR) + mle_enable <= cont_d_i[0]; + + // synchronizer for controller vars + always @(posedge clk, negedge rstn) + if( !rstn ) begin + mle_enable_m1 <= 1'b0; + mle_enable_m2 <= 1'b0; + end + else begin + mle_enable_m1 <= mle_enable; + mle_enable_m2 <= mle_enable_m1; + end + + /****************************************************** + + Event Logic + + ******************************************************/ + + // evt_counter: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_counter <= EVT_CNT_MAX; + else if (evt_counter == EVT_CNT_MAX) + evt_counter <= 'd0; + else + evt_counter <= evt_counter + 1'b1; + + // evt_start: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_start <= 'd0; + else if (mle_enable_m2 && evt_counter == 32'd0) + evt_start <= 1'b1; + else + evt_start <= 1'b0; + + // evt_active: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_active <= 'd0; + else if (evt_counter == EVT_CNT_STOP) + evt_active <= 1'b0; + else if (evt_start) + evt_active <= 'd1; + + /****************************************************** + + FSM_0, Accept data from RX modules into DPRAM Step 0 + + ******************************************************/ + + // FSM_0: + always @(posedge clk, negedge rstn) + if (!rstn) + mle_0_state <= MLE_ST_IDLE; + else + case (mle_0_state) + MLE_ST_IDLE: if (evt_start && !(&empty)) + mle_0_state <= MLE_ST_EVT_START; + MLE_ST_EVT_START: if (!(&empty)) // Is a DU available? + mle_0_state <= MLE_ST_DU_START; + MLE_ST_DU_START: + mle_0_state <= MLE_ST_DU_CONT; + MLE_ST_DU_CONT: if (d_i[8]) // Done flag set? + mle_0_state <= MLE_ST_DU_DONE; + MLE_ST_DU_DONE: if (&empty) // Is another DU available? + mle_0_state <= MLE_ST_EVT_DONE; + else + mle_0_state <= MLE_ST_DU_START; + MLE_ST_EVT_DONE: if (1'b1) + mle_0_state <= MLE_ST_IDLE; + default: mle_0_state <= mle_0_state; + endcase + + + always @(*) + case(mle_0_state) + MLE_ST_IDLE: mle_0_state_str <= "IDLE"; + MLE_ST_START: mle_0_state_str <= "START"; + MLE_ST_EVT_START: mle_0_state_str <= "EVT_START"; + MLE_ST_DU_START: mle_0_state_str <= "DU_START"; + MLE_ST_DU_CONT: mle_0_state_str <= "DU_CONT"; + MLE_ST_DU_DONE: mle_0_state_str <= "DU_DONE"; + MLE_ST_EVT_DONE: mle_0_state_str <= "EVT_DONE"; + default: mle_0_state_str <= "UNDEFINED"; + endcase + + + // wr_block0: + always @(posedge clk, negedge rstn) + if( !rstn ) + wr_block0 <= 'd1; + else if (evt_start) + wr_block0 <= wr_block0 + 1'b1; + + // wr_ptr0: dpram_s0 write address + always @(posedge clk, negedge rstn) + if( !rstn ) + wr_ptr0 <= 'd0; + else if (!(|enable)) + wr_ptr0 <= 'd0; + else if (we0) + wr_ptr0 <= wr_ptr0 + 1'b1; + + assign wr_addr0 = (wr_block0 << BLOCK_OFFSET_SHIT) + wr_ptr0; + assign we0 = we; + + // enable_logic_active: assert a bit to indicate which enable logic block is active + always @(posedge clk, negedge rstn) + if( !rstn ) + enable_logic_active <= 'd0; + else if (mle_0_state == MLE_ST_IDLE) + enable_logic_active <= 'd0; + else if (mle_0_state == MLE_ST_EVT_START) + enable_logic_active <= 1'b1; + else if (mle_0_state == MLE_ST_DU_DONE) + enable_logic_active <= enable_logic_active << 1; + + // enable logic: assert each enable until empty + generate + genvar i; + for (i=0; i< NUM_IF; i=i+1) begin: enable_logic + always @(posedge clk, negedge rstn) + if(!rstn) + enable[i] <= 1'b0; + else if (enable[i] && empty[i]) + enable[i] <= 1'b0; + else if (enable_logic_active[i] && !empty[i]) + enable[i] <= 1'b1; + else + enable[i] <= 1'b0; + + always @(posedge clk, negedge rstn) + if(!rstn) begin + empty_m1[i] <= 1'b1; + empty_m2[i] <= 1'b1; + end + else begin + empty_m1[i] <= empty[i]; + empty_m2[i] <= empty_m1[i]; + end + end + endgenerate + + // cnt0: count number of words written into dpram_s0 per event cycle + always @(posedge clk, negedge rstn) + if( !rstn ) + cnt0 <= 'd0; + else if (!evt_active) + cnt0 <= 'd0; + else if (|enable && !(|empty)) + cnt0 <= cnt0 + 1'b1; + + + // Instantiate x9 DPRAM for Step 0 DPRAM + dpram_inf #(.ADDR_WIDTH(10),.DPRAM_INIT("mle_ram_0.txt")) dpram_s0( + .rstn(rstn), + .a_clk(clk), + .a_clk_e(clk_e), + .a_we(we0), + .a_oe(1'b0), + .a_addr(wr_addr0), + .a_din(d_i), + .a_dout(), + // port B + .b_clk(clk), + .b_clk_e(1'b1), + .b_we(1'b0), + .b_oe(rd_oe0), + .b_addr(rd_addr0), + .b_din(9'h0), + .b_dout(d_s0_o) + ); + +`ifdef DIRECT_OUTPUT + + assign rd_oe0 = fifo_re; + assign fifo_d = d_s0_o; + assign fifo_d_o[7:0] = fifo_empty ? 8'h00 : d_s0_o[7:0]; + assign fifo_d_o[8] = fifo_empty_o ? 1'b1 : 1'b0; + assign rd_addr0 = rd_addr1; + assign wr_addr1 = wr_addr0; + assign we1 = we0; + +`else + + + /****************************************************** + + FSM_1: Read data from DPRAM Step 0. + read and store code. FSM logic/path may depend on code + Do I need a size field? + + ******************************************************/ + + // evt_delay_1: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_delay_1 <= 'd0; + else if (evt_counter == EVT_CNT_DELAY_1) + evt_delay_1 <= 1'b1; + else + evt_delay_1 <= 1'b0; + + // FSM_1: + always @(posedge clk, negedge rstn) + if (!rstn) + mle_1_state <= MLE_ST_IDLE; + else if (evt_active) + case (mle_1_state) + MLE_ST_IDLE: if (evt_delay_1) + mle_1_state <= MLE_ST_DU_START; + MLE_ST_DU_START: + mle_1_state <= MLE_ST_DU_CONT; + MLE_ST_DU_CONT: if (d_s0_o[8]) + mle_1_state <= MLE_ST_DU_DONE; + MLE_ST_DU_DONE: if (d_s0_o[8]) + mle_1_state <= MLE_ST_IDLE; + else + mle_1_state <= MLE_ST_DU_START; + default: mle_1_state <= mle_1_state; + endcase + + // rd_block0: + always @(posedge clk, negedge rstn) + if( !rstn ) + rd_block0 <= 'd0; + else if (evt_delay_1) + rd_block0 <= wr_block0; + + // rd_ptr0: dpram_s0 read pointer + always @(posedge clk, negedge rstn) + if( !rstn ) + rd_ptr0 <= 'd0; + else + rd_ptr0 <= rd_ptr0 + 1'b1; + + assign rd_addr0 = rd_block0 << BLOCK_OFFSET_SHIT + rd_ptr0; + + // rd_oe0: + assign rd_oe0 = 1'b1; + + // wr_block1: + always @(posedge clk, negedge rstn) + if( !rstn ) + wr_block1 <= 'd1; + else if (evt_delay_2) + wr_block1 <= wr_block0; + + // wr_ptr1: dpram_s1 write address + always @(posedge clk, negedge rstn) + if( !rstn ) + wr_ptr1 <= 'd0; + else if (1'b0) + wr_ptr1 <= wr_ptr1 + 1'b1; + + assign wr_addr1 = wr_block1 << BLOCK_OFFSET_SHIT + wr_ptr1; + + + + // we1: write enable for dpram_s1 + always @(posedge clk, negedge rstn) + if( !rstn ) + we1 <= 1'b0; + else if (1'b0) + we1 <= 1'b1; + else + we1 <= 1'b0; + + + + // Instantiate 1k x 9 DPRAM + // dpram_s1: port B interfaces with Switch + dpram_inf #(.ADDR_WIDTH(10), .DPRAM_INIT("mle_ram_1.txt")) dpram_s1( + .rstn(rstn), + .a_clk(clk), + .a_clk_e(1'b1), + .a_we(we1), + .a_oe(1'b0), + .a_addr(wr_addr1), + .a_din(d_i_internal), + .a_dout(), + // port B + .b_clk(clk), + .b_clk_e(1'b1), + .b_we(1'b0), + .b_oe(fifo_re), + .b_addr(rd_addr1), + .b_din(9'h0), + .b_dout(fifo_d) + ); + + /****************************************************** + + FSM_2, Transfer data from Processing to DPRAM Out + + ******************************************************/ + + // evt_delay_2: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_delay_2 <= 'd0; + else if (evt_counter == EVT_CNT_DELAY_2) + evt_delay_2 <= 1'b1; + else + evt_delay_2 <= 1'b0; + + + // FSM_2: + always @(posedge clk, negedge rstn) + if (!rstn) + mle_2_state <= MLE_ST_IDLE; + else if (evt_active) + case (mle_2_state) + MLE_ST_EVT_START: if (1'b1) + mle_2_state <= MLE_ST_DU_START; + MLE_ST_DU_START: if (1'b1) + mle_2_state <= MLE_ST_DU_CONT; + MLE_ST_DU_CONT: if (1'b1) + mle_2_state <= MLE_ST_DU_DONE; + MLE_ST_DU_DONE: if (1'b1) + mle_2_state <= MLE_ST_EVT_DONE; + MLE_ST_EVT_DONE: if (1'b1) + mle_2_state <= MLE_ST_IDLE; + default: mle_2_state <= mle_2_state; + endcase + + + /****************************************************** + + Output, Switch reads from dpram_s1 + + ******************************************************/ + + +`endif + + // d_out_avail: data for output is available + always @(posedge clk, negedge rstn) + if( !rstn ) + d_out_avail <= 1'b0; + else if (evt_delay_out) + d_out_avail <= 1'b0; + else if (we1) + d_out_avail <= 1'b1; + + // evt_delay_out: + always @(posedge clk, negedge rstn) + if( !rstn ) + evt_delay_out <= 'd0; + else if (evt_counter == EVT_CNT_OUT && d_out_avail) + evt_delay_out <= 1'b1; + else + evt_delay_out <= 1'b0; + + // fifo_d_out_flag: bit 8 from dpram_s1 + always @(posedge clk, negedge rstn) + if (!rstn) + fifo_d_out_flag <= 1'b0; + else if (fifo_re) + fifo_d_out_flag <= fifo_d[8]; + else + fifo_d_out_flag <= 1'b0; + + assign byte_cnt = BLOCK_OFFSET; + + + // fifo_empty: assert when the last byte from the DPRAM is read + always @(posedge clk, negedge rstn) + if( !rstn ) + fifo_empty <= 1'b1; + else if (fifo_d_out_flag) + fifo_empty <= 1'b1; + else if (evt_delay_out) + fifo_empty <= 1'b0; + + + // fifo_empty_o: module output to indicate to reader to stop + always @(posedge clk, negedge rstn) + if( !rstn ) + fifo_empty_o <= 1'b1; + else if (rd_addr1 == (rd_block1 << BLOCK_OFFSET_SHIT) + (byte_cnt-1)) + fifo_empty_o <= 1'b1; + else if (evt_delay_out) + fifo_empty_o <= 1'b0; + + + // rd_block1: + always @(posedge clk, negedge rstn) + if( !rstn ) + rd_block1 <= 'd0; + else if (evt_delay_out) + rd_block1 <= wr_block0; + + // rd_ptr1: dpram_s1 write address + always @(posedge clk, negedge rstn) + if( !rstn ) + rd_ptr1 <= 'd0; + else if (evt_delay_out) + rd_ptr1 <= 'd0; + else if (fifo_re) + rd_ptr1 <= rd_ptr1 + 1'b1; + + assign rd_addr1 = (rd_block1 << BLOCK_OFFSET_SHIT) + rd_ptr1; + + // pkt_sz: store number of bytes in dpram_s1 to use as packet size + always @(posedge clk, negedge rstn) + if( !rstn ) + pkt_sz <= 'd0; + else + pkt_sz <= pkt_sz; + + + +endmodule |



