`include "lr_acc.svh"

module lr_acc_8b (
    input logic                             clk,
    input logic                             reset,
    input logic                             start,
    
    input logic [`SPLIT_DATA_LEN-1:0]       x, 
    input logic [`SPLIT_DATA_LEN-1:0]       y, 

    output logic [`MULT_DATA_IN_LEN-1:0]    sum_x,
    output logic [`MULT_DATA_IN_LEN-1:0]    sum_y,
    output logic [`MULT_DATA_IN_LEN-1:0]    sum_x_squared,
    output logic [`MULT_DATA_IN_LEN-1:0]    sum_xy,

    output logic                            done
);

    // ----------- Pipeline Stage 1: Register Inputs -------------
    logic [`SPLIT_DATA_LEN-1:0] local_x, local_y;
    logic                       local_start;

    always_ff @(posedge clk or posedge reset) begin
        if (reset) begin
            local_x     <= '0;
            local_y     <= '0;
            local_start <= 0;
        end else if (start) begin
            local_x     <= x;
            local_y     <= y;
            local_start <= 1;
        end else begin
            local_start <= 0;
        end
    end

    // ---------- Pipeline Stage 2: Multiplier -------------------
    logic [`MULT_OUTPUT_SIZE_8b-1:0] local_x_squared, local_x_times_y;
    logic [`SPLIT_DATA_LEN-1:0] inp1, inp2;

    multiplier #(.WIDTH(4)) mult_x_x (
        .clk(clk),
        .X(local_x),
        .Y(local_x),
        .I1(),
        .I2(),
        .P(local_x_squared)
    );

    multiplier #(.WIDTH(4)) mult_x_y (
        .clk(clk),
        .X(local_x),
        .Y(local_y), 
        .I1(inp1),
        .I2(inp2),
        .P(local_x_times_y)
    );

    // ---------- Delay Stage to Capture Multiplier Outputs ------
    logic                          delay_reg;
    logic                          local_local_start;

    always_ff @(posedge clk or posedge reset) begin
        if (reset) begin
            delay_reg           <= 0;
            local_local_start   <= 0;
        end else begin
            delay_reg           <= local_start;
            local_local_start   <= delay_reg;
        end
    end

    // --------- Pipeline Register After Multipliers ------------
    logic [`MULT_OUTPUT_SIZE_8b-1:0] x_squared, x_times_y;
    logic [`SPLIT_DATA_LEN-1:0]      inp_x, inp_y;
    logic                            start_next_stage;

    logic [`SPLIT_DATA_LEN-1:0] x_pipe, y_pipe;

    always_ff @(posedge clk or posedge reset) begin
        if (reset) begin
            inp_x           <= '0;
            inp_y           <= '0;
            x_squared       <= '0;
            x_times_y       <= '0;
            x_pipe          <= '0;
            y_pipe          <= '0;
            start_next_stage <= 0;
        end else if (local_local_start) begin
            x_pipe          <= local_x;
            y_pipe          <= local_y;
            x_squared       <= local_x_squared;
            x_times_y       <= local_x_times_y;
            inp_x           <= inp1;     // corrected: use proper pipelined x
            inp_y           <= inp2;     // corrected: use proper pipelined y
            start_next_stage <= 1;
        end else begin
            start_next_stage <= 0;
        end
    end

    // ---------- Pipeline Stage 3: Accumulator ------------------
    logic [`MULT_DATA_IN_LEN-1:0] local_sum_x, local_sum_y;
    logic [`MULT_DATA_IN_LEN-1:0] local_sum_xx, local_sum_xy;
    logic                         sum_done;

    always_ff @(posedge clk or posedge reset) begin
        if (reset) begin
            local_sum_x  <= '0;
            local_sum_y  <= '0;
            local_sum_xx <= '0;
            local_sum_xy <= '0;
            sum_done     <= 0;
        end else if (start_next_stage) begin
            local_sum_x  <= local_sum_x + inp_x;
            local_sum_y  <= local_sum_y + inp_y;
            local_sum_xx <= local_sum_xx + x_squared;
            local_sum_xy <= local_sum_xy + x_times_y;
            sum_done     <= 1;
        end else begin
            sum_done <= 0;
        end
    end

    // ------------------- Output Assignment ---------------------
    assign sum_x         = local_sum_x;
    assign sum_y         = local_sum_y;
    assign sum_x_squared = local_sum_xx;
    assign sum_xy        = local_sum_xy;
    assign done          = sum_done;

endmodule
