/* * ml_engine.v * * Copyright (C) 2025 Private Island Networks Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * function: Machine Learning Engine Framework * * see https://privateisland.tech/dev/pi-ml-framework * */ module ml_engine #(parameter NUM_IF=8, DPRAM_DEPTH=1024) ( input rstn, input clk, // controller interface input cont_clk, input cont_sel, input cont_we, input [15:0] cont_addr, input [15:0] cont_d_i, output reg [15:0] cont_d_o, output cont_tgt_ready, // module interface output reg evt_start, output reg evt_active, output reg [NUM_IF-1:0] enable, input [NUM_IF-1:0] empty, input clk_e, input we, input [8:0] d_i, // switch interface output reg fifo_empty_o, input fifo_re, output [8:0] fifo_d_o, output [10:0] byte_cnt // action interface // memory/coefficient interface ); `define DIRECT_OUTPUT // this disables processing and second DPRAM localparam BLOCK_OFFSET = 'h80; // 128 bytes localparam BLOCK_OFFSET_SHIT = 'd7; // Left shift to multiple by 128 `ifdef SIMULATION localparam EVT_CNT_DELAY_1 = 32'h0000_0010, EVT_CNT_DELAY_2 = 32'h0000_0020, EVT_CNT_OUT = 32'h0000_0030, EVT_CNT_STOP = 32'h0000_00C0, EVT_CNT_MAX = 32'h0000_0100; `else localparam EVT_CNT_DELAY_1 = 32'h0000_0010, EVT_CNT_DELAY_2 = 32'h0000_0020, EVT_CNT_OUT = 32'h0000_0030, EVT_CNT_STOP = 32'h0800_0000, EVT_CNT_MAX = 32'h1000_0000; // Sets max event interval `endif // Controller I/F Addresses localparam MLE_ENABLE_ADDR = 'h0; // FSM States: two loops: event/block and data unit (DU) localparam MLE_ST_IDLE=4'h0, MLE_ST_START=4'h1, MLE_ST_EVT_START = 4'h2, MLE_ST_DU_START = 4'h3, MLE_ST_DU_CONT = 4'h4, MLE_ST_DU_DONE = 4'h5, MLE_ST_EVT_DONE = 4'h6; // variables reg mle_enable, mle_enable_m1, mle_enable_m2; reg[31:0] evt_counter; reg evt_delay_1, evt_delay_2, evt_delay_out; reg [NUM_IF-1:0] empty_m1, empty_m2; // Set up 1K DPRAM as 8 blocks of 128 words. wire [$clog2(DPRAM_DEPTH)-1:0] wr_addr0, wr_addr1; wire [$clog2(DPRAM_DEPTH)-1:0] rd_addr0, rd_addr1; reg [2:0] wr_block0, wr_block1; reg [2:0] rd_block0, rd_block1; reg [6:0] wr_ptr0, wr_ptr1; reg [6:0] rd_ptr0, rd_ptr1; wire rd_oe0; reg [6:0] cnt0; reg [$clog2(DPRAM_DEPTH)-1:0] pkt_sz; wire [8:0] d_s0_o, d_i_internal; reg [NUM_IF-1:0] enable_logic_active; // enable logic active wire we0, we1; reg [3:0] mle_0_state, mle_1_state, mle_2_state; reg d_out_avail; wire [8:0] fifo_d; reg fifo_empty; reg fifo_d_out_flag; // Debug reg [8*12:1] mle_0_state_str; /****************************************************** Controller Interface System's internal controller can write and read important parameters ******************************************************/ // Controller Read Data Mux always @(posedge cont_clk, negedge rstn) if (!rstn) cont_d_o <= 16'hcccc; else case (cont_addr) MLE_ENABLE_ADDR: cont_d_o <= mle_enable; default: cont_d_o <= cont_d_o; endcase // TODO: add logic to prevent controller reading metastable data assign cont_tgt_ready = 1'b1; // mle_enable: enable / disable the MLE always @(posedge cont_clk, negedge rstn) if (!rstn) mle_enable <= 1'b0; else if (cont_we && cont_sel && cont_addr == MLE_ENABLE_ADDR) mle_enable <= cont_d_i[0]; // synchronizer for controller vars always @(posedge clk, negedge rstn) if( !rstn ) begin mle_enable_m1 <= 1'b0; mle_enable_m2 <= 1'b0; end else begin mle_enable_m1 <= mle_enable; mle_enable_m2 <= mle_enable_m1; end /****************************************************** Event Logic ******************************************************/ // evt_counter: always @(posedge clk, negedge rstn) if( !rstn ) evt_counter <= EVT_CNT_MAX; else if (evt_counter == EVT_CNT_MAX) evt_counter <= 'd0; else evt_counter <= evt_counter + 1'b1; // evt_start: always @(posedge clk, negedge rstn) if( !rstn ) evt_start <= 'd0; else if (mle_enable_m2 && evt_counter == 32'd0) evt_start <= 1'b1; else evt_start <= 1'b0; // evt_active: always @(posedge clk, negedge rstn) if( !rstn ) evt_active <= 'd0; else if (evt_counter == EVT_CNT_STOP) evt_active <= 1'b0; else if (evt_start) evt_active <= 'd1; /****************************************************** FSM_0, Accept data from RX modules into DPRAM Step 0 ******************************************************/ // FSM_0: always @(posedge clk, negedge rstn) if (!rstn) mle_0_state <= MLE_ST_IDLE; else case (mle_0_state) MLE_ST_IDLE: if (evt_start && !(&empty)) mle_0_state <= MLE_ST_EVT_START; MLE_ST_EVT_START: if (!(&empty)) // Is a DU available? mle_0_state <= MLE_ST_DU_START; MLE_ST_DU_START: mle_0_state <= MLE_ST_DU_CONT; MLE_ST_DU_CONT: if (d_i[8]) // Done flag set? mle_0_state <= MLE_ST_DU_DONE; MLE_ST_DU_DONE: if (&empty) // Is another DU available? mle_0_state <= MLE_ST_EVT_DONE; else mle_0_state <= MLE_ST_DU_START; MLE_ST_EVT_DONE: if (1'b1) mle_0_state <= MLE_ST_IDLE; default: mle_0_state <= mle_0_state; endcase always @(*) case(mle_0_state) MLE_ST_IDLE: mle_0_state_str <= "IDLE"; MLE_ST_START: mle_0_state_str <= "START"; MLE_ST_EVT_START: mle_0_state_str <= "EVT_START"; MLE_ST_DU_START: mle_0_state_str <= "DU_START"; MLE_ST_DU_CONT: mle_0_state_str <= "DU_CONT"; MLE_ST_DU_DONE: mle_0_state_str <= "DU_DONE"; MLE_ST_EVT_DONE: mle_0_state_str <= "EVT_DONE"; default: mle_0_state_str <= "UNDEFINED"; endcase // wr_block0: always @(posedge clk, negedge rstn) if( !rstn ) wr_block0 <= 'd1; else if (evt_start) wr_block0 <= wr_block0 + 1'b1; // wr_ptr0: dpram_s0 write address always @(posedge clk, negedge rstn) if( !rstn ) wr_ptr0 <= 'd0; else if (!(|enable)) wr_ptr0 <= 'd0; else if (we0) wr_ptr0 <= wr_ptr0 + 1'b1; assign wr_addr0 = (wr_block0 << BLOCK_OFFSET_SHIT) + wr_ptr0; assign we0 = we; // enable_logic_active: assert a bit to indicate which enable logic block is active always @(posedge clk, negedge rstn) if( !rstn ) enable_logic_active <= 'd0; else if (mle_0_state == MLE_ST_IDLE) enable_logic_active <= 'd0; else if (mle_0_state == MLE_ST_EVT_START) enable_logic_active <= 1'b1; else if (mle_0_state == MLE_ST_DU_DONE) enable_logic_active <= enable_logic_active << 1; // enable logic: assert each enable until empty generate genvar i; for (i=0; i< NUM_IF; i=i+1) begin: enable_logic always @(posedge clk, negedge rstn) if(!rstn) enable[i] <= 1'b0; else if (enable[i] && empty[i]) enable[i] <= 1'b0; else if (enable_logic_active[i] && !empty[i]) enable[i] <= 1'b1; else enable[i] <= 1'b0; always @(posedge clk, negedge rstn) if(!rstn) begin empty_m1[i] <= 1'b1; empty_m2[i] <= 1'b1; end else begin empty_m1[i] <= empty[i]; empty_m2[i] <= empty_m1[i]; end end endgenerate // cnt0: count number of words written into dpram_s0 per event cycle always @(posedge clk, negedge rstn) if( !rstn ) cnt0 <= 'd0; else if (!evt_active) cnt0 <= 'd0; else if (|enable && !(|empty)) cnt0 <= cnt0 + 1'b1; // Instantiate x9 DPRAM for Step 0 DPRAM dpram_inf #(.ADDR_WIDTH(10),.DPRAM_INIT("mle_ram_0.txt")) dpram_s0( .rstn(rstn), .a_clk(clk), .a_clk_e(clk_e), .a_we(we0), .a_oe(1'b0), .a_addr(wr_addr0), .a_din(d_i), .a_dout(), // port B .b_clk(clk), .b_clk_e(1'b1), .b_we(1'b0), .b_oe(rd_oe0), .b_addr(rd_addr0), .b_din(9'h0), .b_dout(d_s0_o) ); `ifdef DIRECT_OUTPUT assign rd_oe0 = fifo_re; assign fifo_d = d_s0_o; assign fifo_d_o[7:0] = fifo_empty ? 8'h00 : d_s0_o[7:0]; assign fifo_d_o[8] = fifo_empty_o ? 1'b1 : 1'b0; assign rd_addr0 = rd_addr1; assign wr_addr1 = wr_addr0; assign we1 = we0; `else /****************************************************** FSM_1: Read data from DPRAM Step 0. read and store code. FSM logic/path may depend on code Do I need a size field? ******************************************************/ // evt_delay_1: always @(posedge clk, negedge rstn) if( !rstn ) evt_delay_1 <= 'd0; else if (evt_counter == EVT_CNT_DELAY_1) evt_delay_1 <= 1'b1; else evt_delay_1 <= 1'b0; // FSM_1: always @(posedge clk, negedge rstn) if (!rstn) mle_1_state <= MLE_ST_IDLE; else if (evt_active) case (mle_1_state) MLE_ST_IDLE: if (evt_delay_1) mle_1_state <= MLE_ST_DU_START; MLE_ST_DU_START: mle_1_state <= MLE_ST_DU_CONT; MLE_ST_DU_CONT: if (d_s0_o[8]) mle_1_state <= MLE_ST_DU_DONE; MLE_ST_DU_DONE: if (d_s0_o[8]) mle_1_state <= MLE_ST_IDLE; else mle_1_state <= MLE_ST_DU_START; default: mle_1_state <= mle_1_state; endcase // rd_block0: always @(posedge clk, negedge rstn) if( !rstn ) rd_block0 <= 'd0; else if (evt_delay_1) rd_block0 <= wr_block0; // rd_ptr0: dpram_s0 read pointer always @(posedge clk, negedge rstn) if( !rstn ) rd_ptr0 <= 'd0; else rd_ptr0 <= rd_ptr0 + 1'b1; assign rd_addr0 = rd_block0 << BLOCK_OFFSET_SHIT + rd_ptr0; // rd_oe0: assign rd_oe0 = 1'b1; // wr_block1: always @(posedge clk, negedge rstn) if( !rstn ) wr_block1 <= 'd1; else if (evt_delay_2) wr_block1 <= wr_block0; // wr_ptr1: dpram_s1 write address always @(posedge clk, negedge rstn) if( !rstn ) wr_ptr1 <= 'd0; else if (1'b0) wr_ptr1 <= wr_ptr1 + 1'b1; assign wr_addr1 = wr_block1 << BLOCK_OFFSET_SHIT + wr_ptr1; // we1: write enable for dpram_s1 always @(posedge clk, negedge rstn) if( !rstn ) we1 <= 1'b0; else if (1'b0) we1 <= 1'b1; else we1 <= 1'b0; // Instantiate 1k x 9 DPRAM // dpram_s1: port B interfaces with Switch dpram_inf #(.ADDR_WIDTH(10), .DPRAM_INIT("mle_ram_1.txt")) dpram_s1( .rstn(rstn), .a_clk(clk), .a_clk_e(1'b1), .a_we(we1), .a_oe(1'b0), .a_addr(wr_addr1), .a_din(d_i_internal), .a_dout(), // port B .b_clk(clk), .b_clk_e(1'b1), .b_we(1'b0), .b_oe(fifo_re), .b_addr(rd_addr1), .b_din(9'h0), .b_dout(fifo_d) ); /****************************************************** FSM_2, Transfer data from Processing to DPRAM Out ******************************************************/ // evt_delay_2: always @(posedge clk, negedge rstn) if( !rstn ) evt_delay_2 <= 'd0; else if (evt_counter == EVT_CNT_DELAY_2) evt_delay_2 <= 1'b1; else evt_delay_2 <= 1'b0; // FSM_2: always @(posedge clk, negedge rstn) if (!rstn) mle_2_state <= MLE_ST_IDLE; else if (evt_active) case (mle_2_state) MLE_ST_EVT_START: if (1'b1) mle_2_state <= MLE_ST_DU_START; MLE_ST_DU_START: if (1'b1) mle_2_state <= MLE_ST_DU_CONT; MLE_ST_DU_CONT: if (1'b1) mle_2_state <= MLE_ST_DU_DONE; MLE_ST_DU_DONE: if (1'b1) mle_2_state <= MLE_ST_EVT_DONE; MLE_ST_EVT_DONE: if (1'b1) mle_2_state <= MLE_ST_IDLE; default: mle_2_state <= mle_2_state; endcase /****************************************************** Output, Switch reads from dpram_s1 ******************************************************/ `endif // d_out_avail: data for output is available always @(posedge clk, negedge rstn) if( !rstn ) d_out_avail <= 1'b0; else if (evt_delay_out) d_out_avail <= 1'b0; else if (we1) d_out_avail <= 1'b1; // evt_delay_out: always @(posedge clk, negedge rstn) if( !rstn ) evt_delay_out <= 'd0; else if (evt_counter == EVT_CNT_OUT && d_out_avail) evt_delay_out <= 1'b1; else evt_delay_out <= 1'b0; // fifo_d_out_flag: bit 8 from dpram_s1 always @(posedge clk, negedge rstn) if (!rstn) fifo_d_out_flag <= 1'b0; else if (fifo_re) fifo_d_out_flag <= fifo_d[8]; else fifo_d_out_flag <= 1'b0; assign byte_cnt = BLOCK_OFFSET; // fifo_empty: assert when the last byte from the DPRAM is read always @(posedge clk, negedge rstn) if( !rstn ) fifo_empty <= 1'b1; else if (fifo_d_out_flag) fifo_empty <= 1'b1; else if (evt_delay_out) fifo_empty <= 1'b0; // fifo_empty_o: module output to indicate to reader to stop always @(posedge clk, negedge rstn) if( !rstn ) fifo_empty_o <= 1'b1; else if (rd_addr1 == (rd_block1 << BLOCK_OFFSET_SHIT) + (byte_cnt-1)) fifo_empty_o <= 1'b1; else if (evt_delay_out) fifo_empty_o <= 1'b0; // rd_block1: always @(posedge clk, negedge rstn) if( !rstn ) rd_block1 <= 'd0; else if (evt_delay_out) rd_block1 <= wr_block0; // rd_ptr1: dpram_s1 write address always @(posedge clk, negedge rstn) if( !rstn ) rd_ptr1 <= 'd0; else if (evt_delay_out) rd_ptr1 <= 'd0; else if (fifo_re) rd_ptr1 <= rd_ptr1 + 1'b1; assign rd_addr1 = (rd_block1 << BLOCK_OFFSET_SHIT) + rd_ptr1; // pkt_sz: store number of bytes in dpram_s1 to use as packet size always @(posedge clk, negedge rstn) if( !rstn ) pkt_sz <= 'd0; else pkt_sz <= pkt_sz; endmodule