`include "lr_acc.svh"

module lr_acc_512_8b_wrapper (
    input  logic        clk,
    input  logic        reset,
    input  logic        trigger,            // external trigger to start
    output logic        done_flag,          // signals completion
    output logic [31:0] out_result_0,       // s1s4 - s2s2
    output logic [31:0] out_result_1,       // s3s4 - s2s5
    output logic [31:0] out_result_2        // s1s5 - s2s3
);

    // Internal signals to DUT
    logic [31:0] writedata;
    logic        write;
    logic        read;
    logic        chipselect;
    logic [9:0]  address;
    logic [31:0] readdata;

    // DUT instance
    lr_acc_512_8b dut (
        .clk        (clk),
        .reset      (reset),
        .writedata  (writedata),
        .write      (write),
        .read       (read),
        .chipselect (chipselect),
        .address    (address),
        .readdata   (readdata)
    );

    // State Machine
    typedef enum logic [2:0] {
        IDLE,
        LOAD_DATA,
        START_COMPUTE,
        WAIT_DONE,
        READ_RESULTS
    } state_t;

    state_t state;
    int load_index;
    logic [31:0] result_buffer [0:2];

    // Main wrapper logic
    always_ff @(posedge clk or posedge reset) begin
        if (reset) begin
            state       <= IDLE;
            chipselect  <= 0;
            write       <= 0;
            read        <= 0;
            load_index  <= 0;
            writedata   <= 0;
            address     <= 0;
            done_flag   <= 0;
            out_result_0 <= 0;
            out_result_1 <= 0;
            out_result_2 <= 0;
        end else begin
            case (state)
                IDLE: begin
                    done_flag <= 0;
                    if (trigger) begin
                        load_index <= 0;
                        state <= LOAD_DATA;
                    end
                end

                LOAD_DATA: begin
                    chipselect <= 1;
                    write <= 1;
                    address <= load_index[8:0];
                    writedata <= {24'd0, (load_index + 1)[3:0], load_index[3:0]};
                    load_index <= load_index + 1;
                    if (load_index == 511)
                        state <= START_COMPUTE;
                end

                START_COMPUTE: begin
                    chipselect <= 1;
                    write <= 1;
                    address <= 10'h100; // start trigger
                    writedata <= 32'd1;
                    state <= WAIT_DONE;
                end

                WAIT_DONE: begin
                    chipselect <= 1;
                    read <= 1;
                    write <= 0;
                    address <= 10'h000; // master_done address
                    if (readdata == 32'd1) begin
                        state <= READ_RESULTS;
                        load_index <= 1;
                    end
                end

                READ_RESULTS: begin
                    chipselect <= 1;
                    read <= 1;
                    address <= load_index;
                    result_buffer[load_index - 1] <= readdata;
                    if (load_index == 3) begin
                        out_result_0 <= result_buffer[0];
                        out_result_1 <= result_buffer[1];
                        out_result_2 <= result_buffer[2];
                        done_flag <= 1;
                        state <= IDLE;
                    end else begin
                        load_index <= load_index + 1;
                    end
                end
            endcase
        end
    end

endmodule
