summaryrefslogtreecommitdiffhomepage
path: root/src/ml_engine.v
diff options
context:
space:
mode:
Diffstat (limited to 'src/ml_engine.v')
-rw-r--r--src/ml_engine.v563
1 files changed, 563 insertions, 0 deletions
diff --git a/src/ml_engine.v b/src/ml_engine.v
new file mode 100644
index 0000000..ec50d87
--- /dev/null
+++ b/src/ml_engine.v
@@ -0,0 +1,563 @@
+/*
+ * ml_engine.v
+ *
+ * Copyright (C) 2025 Private Island Networks Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * function: Machine Learning Engine Framework
+ *
+ * see https://privateisland.tech/dev/pi-ml-framework
+ *
+ */
+
+module ml_engine #(parameter NUM_IF=8, DPRAM_DEPTH=1024)
+(
+ input rstn,
+ input clk,
+
+ // controller interface
+ input cont_clk,
+ input cont_sel,
+ input cont_we,
+ input [15:0] cont_addr,
+ input [15:0] cont_d_i,
+ output reg [15:0] cont_d_o,
+ output cont_tgt_ready,
+
+ // module interface
+ output reg evt_start,
+ output reg evt_active,
+ output reg [NUM_IF-1:0] enable,
+ input [NUM_IF-1:0] empty,
+ input clk_e,
+ input we,
+ input [8:0] d_i,
+
+ // switch interface
+ output reg fifo_empty_o,
+ input fifo_re,
+ output [8:0] fifo_d_o,
+ output [10:0] byte_cnt
+
+ // action interface
+
+ // memory/coefficient interface
+
+);
+
+`define DIRECT_OUTPUT // this disables processing and second DPRAM
+
+ localparam BLOCK_OFFSET = 'h80; // 128 bytes
+ localparam BLOCK_OFFSET_SHIT = 'd7; // Left shift to multiple by 128
+
+`ifdef SIMULATION
+ localparam EVT_CNT_DELAY_1 = 32'h0000_0010,
+ EVT_CNT_DELAY_2 = 32'h0000_0020,
+ EVT_CNT_OUT = 32'h0000_0030,
+ EVT_CNT_STOP = 32'h0000_00C0,
+ EVT_CNT_MAX = 32'h0000_0100;
+`else
+ localparam EVT_CNT_DELAY_1 = 32'h0000_0010,
+ EVT_CNT_DELAY_2 = 32'h0000_0020,
+ EVT_CNT_OUT = 32'h0000_0030,
+ EVT_CNT_STOP = 32'h0800_0000,
+ EVT_CNT_MAX = 32'h1000_0000; // Sets max event interval
+`endif
+
+
+ // Controller I/F Addresses
+ localparam MLE_ENABLE_ADDR = 'h0;
+
+ // FSM States: two loops: event/block and data unit (DU)
+ localparam MLE_ST_IDLE=4'h0, MLE_ST_START=4'h1, MLE_ST_EVT_START = 4'h2,
+ MLE_ST_DU_START = 4'h3, MLE_ST_DU_CONT = 4'h4, MLE_ST_DU_DONE = 4'h5,
+ MLE_ST_EVT_DONE = 4'h6;
+
+ // variables
+ reg mle_enable, mle_enable_m1, mle_enable_m2;
+ reg[31:0] evt_counter;
+ reg evt_delay_1, evt_delay_2, evt_delay_out;
+
+ reg [NUM_IF-1:0] empty_m1, empty_m2;
+
+ // Set up 1K DPRAM as 8 blocks of 128 words.
+ wire [$clog2(DPRAM_DEPTH)-1:0] wr_addr0, wr_addr1;
+ wire [$clog2(DPRAM_DEPTH)-1:0] rd_addr0, rd_addr1;
+ reg [2:0] wr_block0, wr_block1;
+ reg [2:0] rd_block0, rd_block1;
+ reg [6:0] wr_ptr0, wr_ptr1;
+ reg [6:0] rd_ptr0, rd_ptr1;
+ wire rd_oe0;
+ reg [6:0] cnt0;
+ reg [$clog2(DPRAM_DEPTH)-1:0] pkt_sz;
+
+ wire [8:0] d_s0_o, d_i_internal;
+
+ reg [NUM_IF-1:0] enable_logic_active; // enable logic active
+
+ wire we0, we1;
+
+ reg [3:0] mle_0_state, mle_1_state, mle_2_state;
+ reg d_out_avail;
+ wire [8:0] fifo_d;
+ reg fifo_empty;
+ reg fifo_d_out_flag;
+
+ // Debug
+ reg [8*12:1] mle_0_state_str;
+
+
+ /******************************************************
+
+ Controller Interface
+
+ System's internal controller can write and read important parameters
+
+ ******************************************************/
+
+ // Controller Read Data Mux
+ always @(posedge cont_clk, negedge rstn)
+ if (!rstn)
+ cont_d_o <= 16'hcccc;
+ else
+ case (cont_addr)
+ MLE_ENABLE_ADDR: cont_d_o <= mle_enable;
+ default: cont_d_o <= cont_d_o;
+ endcase
+
+ // TODO: add logic to prevent controller reading metastable data
+ assign cont_tgt_ready = 1'b1;
+
+
+ // mle_enable: enable / disable the MLE
+ always @(posedge cont_clk, negedge rstn)
+ if (!rstn)
+ mle_enable <= 1'b0;
+ else if (cont_we && cont_sel && cont_addr == MLE_ENABLE_ADDR)
+ mle_enable <= cont_d_i[0];
+
+ // synchronizer for controller vars
+ always @(posedge clk, negedge rstn)
+ if( !rstn ) begin
+ mle_enable_m1 <= 1'b0;
+ mle_enable_m2 <= 1'b0;
+ end
+ else begin
+ mle_enable_m1 <= mle_enable;
+ mle_enable_m2 <= mle_enable_m1;
+ end
+
+ /******************************************************
+
+ Event Logic
+
+ ******************************************************/
+
+ // evt_counter:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_counter <= EVT_CNT_MAX;
+ else if (evt_counter == EVT_CNT_MAX)
+ evt_counter <= 'd0;
+ else
+ evt_counter <= evt_counter + 1'b1;
+
+ // evt_start:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_start <= 'd0;
+ else if (mle_enable_m2 && evt_counter == 32'd0)
+ evt_start <= 1'b1;
+ else
+ evt_start <= 1'b0;
+
+ // evt_active:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_active <= 'd0;
+ else if (evt_counter == EVT_CNT_STOP)
+ evt_active <= 1'b0;
+ else if (evt_start)
+ evt_active <= 'd1;
+
+ /******************************************************
+
+ FSM_0, Accept data from RX modules into DPRAM Step 0
+
+ ******************************************************/
+
+ // FSM_0:
+ always @(posedge clk, negedge rstn)
+ if (!rstn)
+ mle_0_state <= MLE_ST_IDLE;
+ else
+ case (mle_0_state)
+ MLE_ST_IDLE: if (evt_start && !(&empty))
+ mle_0_state <= MLE_ST_EVT_START;
+ MLE_ST_EVT_START: if (!(&empty)) // Is a DU available?
+ mle_0_state <= MLE_ST_DU_START;
+ MLE_ST_DU_START:
+ mle_0_state <= MLE_ST_DU_CONT;
+ MLE_ST_DU_CONT: if (d_i[8]) // Done flag set?
+ mle_0_state <= MLE_ST_DU_DONE;
+ MLE_ST_DU_DONE: if (&empty) // Is another DU available?
+ mle_0_state <= MLE_ST_EVT_DONE;
+ else
+ mle_0_state <= MLE_ST_DU_START;
+ MLE_ST_EVT_DONE: if (1'b1)
+ mle_0_state <= MLE_ST_IDLE;
+ default: mle_0_state <= mle_0_state;
+ endcase
+
+
+ always @(*)
+ case(mle_0_state)
+ MLE_ST_IDLE: mle_0_state_str <= "IDLE";
+ MLE_ST_START: mle_0_state_str <= "START";
+ MLE_ST_EVT_START: mle_0_state_str <= "EVT_START";
+ MLE_ST_DU_START: mle_0_state_str <= "DU_START";
+ MLE_ST_DU_CONT: mle_0_state_str <= "DU_CONT";
+ MLE_ST_DU_DONE: mle_0_state_str <= "DU_DONE";
+ MLE_ST_EVT_DONE: mle_0_state_str <= "EVT_DONE";
+ default: mle_0_state_str <= "UNDEFINED";
+ endcase
+
+
+ // wr_block0:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ wr_block0 <= 'd1;
+ else if (evt_start)
+ wr_block0 <= wr_block0 + 1'b1;
+
+ // wr_ptr0: dpram_s0 write address
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ wr_ptr0 <= 'd0;
+ else if (!(|enable))
+ wr_ptr0 <= 'd0;
+ else if (we0)
+ wr_ptr0 <= wr_ptr0 + 1'b1;
+
+ assign wr_addr0 = (wr_block0 << BLOCK_OFFSET_SHIT) + wr_ptr0;
+ assign we0 = we;
+
+ // enable_logic_active: assert a bit to indicate which enable logic block is active
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ enable_logic_active <= 'd0;
+ else if (mle_0_state == MLE_ST_IDLE)
+ enable_logic_active <= 'd0;
+ else if (mle_0_state == MLE_ST_EVT_START)
+ enable_logic_active <= 1'b1;
+ else if (mle_0_state == MLE_ST_DU_DONE)
+ enable_logic_active <= enable_logic_active << 1;
+
+ // enable logic: assert each enable until empty
+ generate
+ genvar i;
+ for (i=0; i< NUM_IF; i=i+1) begin: enable_logic
+ always @(posedge clk, negedge rstn)
+ if(!rstn)
+ enable[i] <= 1'b0;
+ else if (enable[i] && empty[i])
+ enable[i] <= 1'b0;
+ else if (enable_logic_active[i] && !empty[i])
+ enable[i] <= 1'b1;
+ else
+ enable[i] <= 1'b0;
+
+ always @(posedge clk, negedge rstn)
+ if(!rstn) begin
+ empty_m1[i] <= 1'b1;
+ empty_m2[i] <= 1'b1;
+ end
+ else begin
+ empty_m1[i] <= empty[i];
+ empty_m2[i] <= empty_m1[i];
+ end
+ end
+ endgenerate
+
+ // cnt0: count number of words written into dpram_s0 per event cycle
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ cnt0 <= 'd0;
+ else if (!evt_active)
+ cnt0 <= 'd0;
+ else if (|enable && !(|empty))
+ cnt0 <= cnt0 + 1'b1;
+
+
+ // Instantiate x9 DPRAM for Step 0 DPRAM
+ dpram_inf #(.ADDR_WIDTH(10),.DPRAM_INIT("mle_ram_0.txt")) dpram_s0(
+ .rstn(rstn),
+ .a_clk(clk),
+ .a_clk_e(clk_e),
+ .a_we(we0),
+ .a_oe(1'b0),
+ .a_addr(wr_addr0),
+ .a_din(d_i),
+ .a_dout(),
+ // port B
+ .b_clk(clk),
+ .b_clk_e(1'b1),
+ .b_we(1'b0),
+ .b_oe(rd_oe0),
+ .b_addr(rd_addr0),
+ .b_din(9'h0),
+ .b_dout(d_s0_o)
+ );
+
+`ifdef DIRECT_OUTPUT
+
+ assign rd_oe0 = fifo_re;
+ assign fifo_d = d_s0_o;
+ assign fifo_d_o[7:0] = fifo_empty ? 8'h00 : d_s0_o[7:0];
+ assign fifo_d_o[8] = fifo_empty_o ? 1'b1 : 1'b0;
+ assign rd_addr0 = rd_addr1;
+ assign wr_addr1 = wr_addr0;
+ assign we1 = we0;
+
+`else
+
+
+ /******************************************************
+
+ FSM_1: Read data from DPRAM Step 0.
+ read and store code. FSM logic/path may depend on code
+ Do I need a size field?
+
+ ******************************************************/
+
+ // evt_delay_1:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_delay_1 <= 'd0;
+ else if (evt_counter == EVT_CNT_DELAY_1)
+ evt_delay_1 <= 1'b1;
+ else
+ evt_delay_1 <= 1'b0;
+
+ // FSM_1:
+ always @(posedge clk, negedge rstn)
+ if (!rstn)
+ mle_1_state <= MLE_ST_IDLE;
+ else if (evt_active)
+ case (mle_1_state)
+ MLE_ST_IDLE: if (evt_delay_1)
+ mle_1_state <= MLE_ST_DU_START;
+ MLE_ST_DU_START:
+ mle_1_state <= MLE_ST_DU_CONT;
+ MLE_ST_DU_CONT: if (d_s0_o[8])
+ mle_1_state <= MLE_ST_DU_DONE;
+ MLE_ST_DU_DONE: if (d_s0_o[8])
+ mle_1_state <= MLE_ST_IDLE;
+ else
+ mle_1_state <= MLE_ST_DU_START;
+ default: mle_1_state <= mle_1_state;
+ endcase
+
+ // rd_block0:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ rd_block0 <= 'd0;
+ else if (evt_delay_1)
+ rd_block0 <= wr_block0;
+
+ // rd_ptr0: dpram_s0 read pointer
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ rd_ptr0 <= 'd0;
+ else
+ rd_ptr0 <= rd_ptr0 + 1'b1;
+
+ assign rd_addr0 = rd_block0 << BLOCK_OFFSET_SHIT + rd_ptr0;
+
+ // rd_oe0:
+ assign rd_oe0 = 1'b1;
+
+ // wr_block1:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ wr_block1 <= 'd1;
+ else if (evt_delay_2)
+ wr_block1 <= wr_block0;
+
+ // wr_ptr1: dpram_s1 write address
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ wr_ptr1 <= 'd0;
+ else if (1'b0)
+ wr_ptr1 <= wr_ptr1 + 1'b1;
+
+ assign wr_addr1 = wr_block1 << BLOCK_OFFSET_SHIT + wr_ptr1;
+
+
+
+ // we1: write enable for dpram_s1
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ we1 <= 1'b0;
+ else if (1'b0)
+ we1 <= 1'b1;
+ else
+ we1 <= 1'b0;
+
+
+
+ // Instantiate 1k x 9 DPRAM
+ // dpram_s1: port B interfaces with Switch
+ dpram_inf #(.ADDR_WIDTH(10), .DPRAM_INIT("mle_ram_1.txt")) dpram_s1(
+ .rstn(rstn),
+ .a_clk(clk),
+ .a_clk_e(1'b1),
+ .a_we(we1),
+ .a_oe(1'b0),
+ .a_addr(wr_addr1),
+ .a_din(d_i_internal),
+ .a_dout(),
+ // port B
+ .b_clk(clk),
+ .b_clk_e(1'b1),
+ .b_we(1'b0),
+ .b_oe(fifo_re),
+ .b_addr(rd_addr1),
+ .b_din(9'h0),
+ .b_dout(fifo_d)
+ );
+
+ /******************************************************
+
+ FSM_2, Transfer data from Processing to DPRAM Out
+
+ ******************************************************/
+
+ // evt_delay_2:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_delay_2 <= 'd0;
+ else if (evt_counter == EVT_CNT_DELAY_2)
+ evt_delay_2 <= 1'b1;
+ else
+ evt_delay_2 <= 1'b0;
+
+
+ // FSM_2:
+ always @(posedge clk, negedge rstn)
+ if (!rstn)
+ mle_2_state <= MLE_ST_IDLE;
+ else if (evt_active)
+ case (mle_2_state)
+ MLE_ST_EVT_START: if (1'b1)
+ mle_2_state <= MLE_ST_DU_START;
+ MLE_ST_DU_START: if (1'b1)
+ mle_2_state <= MLE_ST_DU_CONT;
+ MLE_ST_DU_CONT: if (1'b1)
+ mle_2_state <= MLE_ST_DU_DONE;
+ MLE_ST_DU_DONE: if (1'b1)
+ mle_2_state <= MLE_ST_EVT_DONE;
+ MLE_ST_EVT_DONE: if (1'b1)
+ mle_2_state <= MLE_ST_IDLE;
+ default: mle_2_state <= mle_2_state;
+ endcase
+
+
+ /******************************************************
+
+ Output, Switch reads from dpram_s1
+
+ ******************************************************/
+
+
+`endif
+
+ // d_out_avail: data for output is available
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ d_out_avail <= 1'b0;
+ else if (evt_delay_out)
+ d_out_avail <= 1'b0;
+ else if (we1)
+ d_out_avail <= 1'b1;
+
+ // evt_delay_out:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ evt_delay_out <= 'd0;
+ else if (evt_counter == EVT_CNT_OUT && d_out_avail)
+ evt_delay_out <= 1'b1;
+ else
+ evt_delay_out <= 1'b0;
+
+ // fifo_d_out_flag: bit 8 from dpram_s1
+ always @(posedge clk, negedge rstn)
+ if (!rstn)
+ fifo_d_out_flag <= 1'b0;
+ else if (fifo_re)
+ fifo_d_out_flag <= fifo_d[8];
+ else
+ fifo_d_out_flag <= 1'b0;
+
+ assign byte_cnt = BLOCK_OFFSET;
+
+
+ // fifo_empty: assert when the last byte from the DPRAM is read
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ fifo_empty <= 1'b1;
+ else if (fifo_d_out_flag)
+ fifo_empty <= 1'b1;
+ else if (evt_delay_out)
+ fifo_empty <= 1'b0;
+
+
+ // fifo_empty_o: module output to indicate to reader to stop
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ fifo_empty_o <= 1'b1;
+ else if (rd_addr1 == (rd_block1 << BLOCK_OFFSET_SHIT) + (byte_cnt-1))
+ fifo_empty_o <= 1'b1;
+ else if (evt_delay_out)
+ fifo_empty_o <= 1'b0;
+
+
+ // rd_block1:
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ rd_block1 <= 'd0;
+ else if (evt_delay_out)
+ rd_block1 <= wr_block0;
+
+ // rd_ptr1: dpram_s1 write address
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ rd_ptr1 <= 'd0;
+ else if (evt_delay_out)
+ rd_ptr1 <= 'd0;
+ else if (fifo_re)
+ rd_ptr1 <= rd_ptr1 + 1'b1;
+
+ assign rd_addr1 = (rd_block1 << BLOCK_OFFSET_SHIT) + rd_ptr1;
+
+ // pkt_sz: store number of bytes in dpram_s1 to use as packet size
+ always @(posedge clk, negedge rstn)
+ if( !rstn )
+ pkt_sz <= 'd0;
+ else
+ pkt_sz <= pkt_sz;
+
+
+
+endmodule

Highly Recommended Verilog Books