`include "lr_acc.svh"


module lr_acc_512_8b (
    //Port Structure
    input logic                             clk,
    input logic                             reset,

    input logic [31:0]                      writedata,
    input logic                             write,
    input logic                             read,
    input                                   chipselect,
    input logic [9:0]                       address,

    output logic [31:0]                     readdata
  );

  logic master_done;
  
  memory [`DATA_SET_SIZE-1:0] mem_bank;
  pc_tracker [`PIPELINE_SETS-1:0] pc_status_bank;
  logic go;
  logic [`ADDRESS_BITS_SIZE-1:0] pc;
  logic done;
  logic done_trigger;
  logic [`MULT_DATA_IN_LEN-1:0] local_s_1;

  logic [`MULT_DATA_IN_LEN-1:0] global_s_1, global_s_2, global_s_3, global_s_4, global_s_5;

  logic [`OUTPUT_DATA_LEN-1:0] s1s4_minus_s2s2;
  logic [`OUTPUT_DATA_LEN-1:0] s3s4_minus_s2s5;
  logic [`OUTPUT_DATA_LEN-1:0] s1s5_minus_s2s3;

  always_ff @(posedge clk)
  begin      //Get data from SW
    if (reset)
    begin
      go <= 1'b0;
      master_done <= '0;
      done <= '0;
      done_trigger <= '0;
      local_s_1 <= '0;
      // global_s_1 <= '0;
      // global_s_2 <= '0;
      // global_s_3 <= '0;
      // global_s_4 <= '0;
      // global_s_5 <= '0;
      // s1s4_minus_s2s2 <= '0;
      // s3s4_minus_s2s5 <= '0;
      // s1s5_minus_s2s3 <= '0;
      for (int i = 0; i < `DATA_SET_SIZE; i++)
      begin
        mem_bank[i].x <= '0;
        mem_bank[i].y <= '0;
      end
    end
    else if (chipselect && write)
    begin      //Read data only when past operation is complete
      case (address[`ADDRESS_BITS_SIZE])
        1'h0:
		begin
		  if (!go)
		  begin
		    mem_bank[address[8:0]].x <= writedata[3:0];
		    mem_bank[address[8:0]].y <= writedata[7:4];
		  end
		end
        1'h1:
		begin
		  if (address[8:0] == 0)
		  begin
		    go <= 1'b0;
		    master_done <= '0;
		    done <= '0;
		    done_trigger <= '0;
		    local_s_1 <= '0;
        // global_s_1 <= '0;
        // global_s_2 <= '0;
        // global_s_3 <= '0;
        // global_s_4 <= '0;
        // global_s_5 <= '0;
        // s1s4_minus_s2s2 <= '0;
        // s3s4_minus_s2s5 <= '0;
        // s1s5_minus_s2s3 <= '0;
		    for (int i = 0; i < `DATA_SET_SIZE; i++)
		    begin
		      mem_bank[i].x <= '0;
		      mem_bank[i].y <= '0;
		    end
		  end
		  else
		  begin
		    go <= 1'b1;
		    local_s_1 <= address[8:0];
		  end
		end
      endcase
    end
    else if (chipselect && read)
    begin      //Read data only when past operation is complete
      case (address)
        10'h0:
        begin
          readdata <= master_done;
        end
        10'h1:
        begin
          readdata <= s1s4_minus_s2s2;
        end
        10'h2:
        begin
          readdata <= s3s4_minus_s2s5;
        end
        10'h3:
        begin
          readdata <= s1s5_minus_s2s3;
        end
        10'h4:
        begin
          readdata <= global_s_1;
        end
        10'h5:
        begin
          readdata <= global_s_2;
        end
        10'h6:
        begin
          readdata <= global_s_3;
        end
        10'h7:
        begin
          readdata <= global_s_4;
        end
        10'h8:
        begin
          readdata <= global_s_5;
        end
      endcase
    end
    
    if (done)
    begin
      done_trigger <= '1;
      done <= '0;
    end
    else if (done_trigger)
    begin
      master_done <= '1;
      done_trigger <= '0;
    end
    else if (pc == 496)
    begin
      go <= '0;
      done <= '1;
    end
  end

  logic [`PIPELINE_SETS-1:0] start ;
  logic [`SPLIT_DATA_LEN-1:0] x [`PIPELINE_SETS-1:0];
  logic [`SPLIT_DATA_LEN-1:0] y [`PIPELINE_SETS-1:0];
  logic [`MULT_DATA_IN_LEN-1:0] sum_x [`PIPELINE_SETS-1:0];
  logic [`MULT_DATA_IN_LEN-1:0] sum_y [`PIPELINE_SETS-1:0];
  logic [`MULT_DATA_IN_LEN-1:0] sum_x_squared [`PIPELINE_SETS-1:0];
  logic [`MULT_DATA_IN_LEN-1:0] sum_xy [`PIPELINE_SETS-1:0];
  logic [`PIPELINE_SETS-1:0] local_done ;


  logic [`PC_BITS_SIZE-1:0] local_pc [`PIPELINE_SETS-1:0];
  logic [`PC_BITS_SIZE-1:0] dut_num;

  logic master_start;
  logic [`ADDRESS_BITS_SIZE-1:0] addr;

  genvar i;
  generate
    for (i = 0; i < `PIPELINE_SETS; i = i + 1)
    begin : lr_acc_gen
      lr_acc_8b lr_acc_8b_0 (
                  .clk(clk),
                  .reset(reset),
                  .start(start[i]),
                  .x(x[i]),
                  .y(y[i]),
                  .sum_x(sum_x[i]),
                  .sum_y(sum_y[i]),
                  .sum_x_squared(sum_x_squared[i]),
                  .sum_xy(sum_xy[i]),
                  .done(local_done[i])
                );
    end
  endgenerate

  always_ff @( posedge clk )
  begin
    if (reset)
    begin
      pc <= '0;
      master_start <= '0;
      for (int i = 0; i < `PIPELINE_SETS; i++)
      begin
        start[i] <= '0;
        x[i] <= '0;
        y[i] <= '0;
      end
      addr <= '0;
      // master_done <= '0;
    // end
    // else if (!master_start && go)
    // begin
    //   for (int i = 0; i < `PIPELINE_SETS; i++)
    //   begin
    //     start[i] <= '0;
    //   end
    //   master_start <= '1;
    // end
    end else if (go)
    begin
      for (int i = 0; i < `PIPELINE_SETS; i++)
      begin
        addr <= pc + i;
        x[i] <= mem_bank[pc + i].x;
        y[i] <= mem_bank[pc + i].y;
        start[i] <= '1;
        pc_status_bank[i].pc <= pc + i;
        pc_status_bank[i].dut_num <= pc + i;
        pc_status_bank[i].done <= '0;
      end
      if(pc < 512)
      begin
        pc <= pc + 16;
      end
      else
      begin
        pc <= '0;
      end
      master_start <= '0;
    end
    else if (!go)
    begin
      for (int i = 0; i < `PIPELINE_SETS; i++)
      begin
        start[i] <= '0;
      end
    end
  end

  logic global_start_adders;


  always_ff @( posedge clk )
  begin
    if(reset)
    begin
      for (int i = 0; i < `PIPELINE_SETS; i++)
      begin
        pc_status_bank[i].pc <= '0;
        pc_status_bank[i].dut_num <= '0;
        pc_status_bank[i].done <= '0;
        pc_status_bank[i].local_s_2 <= '0;
        pc_status_bank[i].local_s_3 <= '0;
        pc_status_bank[i].local_s_4 <= '0;
        pc_status_bank[i].local_s_5 <= '0;
      end
      global_start_adders <= '0;
    end
    else if (master_done)
    begin
      for (int i = 0; i < `PIPELINE_SETS; i++)
      begin
        pc_status_bank[i].done <= '1;
        pc_status_bank[i].local_s_2 <= sum_x[i];
        pc_status_bank[i].local_s_3 <= sum_y[i];
        pc_status_bank[i].local_s_4 <= sum_x_squared[i];
        pc_status_bank[i].local_s_5 <= sum_xy[i];
      end
      global_s_1 <= local_s_1;
      global_start_adders <= '1;
    end
  end


  comb_adder_18b sum_s2_calc (
                  .clk(clk),
                  .reset(reset),
                   .adder_start(global_start_adders),
                   .op0  (pc_status_bank[ 0].local_s_2),
                   .op1  (pc_status_bank[ 1].local_s_2),
                   .op2  (pc_status_bank[ 2].local_s_2),
                   .op3  (pc_status_bank[ 3].local_s_2),

                   .op4  (pc_status_bank[ 4].local_s_2),
                   .op5  (pc_status_bank[ 5].local_s_2),
                   .op6  (pc_status_bank[ 6].local_s_2),
                   .op7  (pc_status_bank[ 7].local_s_2),

                   .op8  (pc_status_bank[ 8].local_s_2),
                   .op9  (pc_status_bank[ 9].local_s_2),
                   .opA  (pc_status_bank[10].local_s_2),
                   .opB  (pc_status_bank[11].local_s_2),

                   .opC  (pc_status_bank[12].local_s_2),
                   .opD  (pc_status_bank[13].local_s_2),
                   .opE  (pc_status_bank[14].local_s_2),
                   .opF  (pc_status_bank[15].local_s_2),
                   .result(global_s_2)
                 );

  comb_adder_18b sum_s3_calc (
                  .clk(clk),
                  .reset(reset),
                   .adder_start(global_start_adders),
                   .op0  (pc_status_bank[ 0].local_s_3),
                   .op1  (pc_status_bank[ 1].local_s_3),
                   .op2  (pc_status_bank[ 2].local_s_3),
                   .op3  (pc_status_bank[ 3].local_s_3),

                   .op4  (pc_status_bank[ 4].local_s_3),
                   .op5  (pc_status_bank[ 5].local_s_3),
                   .op6  (pc_status_bank[ 6].local_s_3),
                   .op7  (pc_status_bank[ 7].local_s_3),

                   .op8  (pc_status_bank[ 8].local_s_3),
                   .op9  (pc_status_bank[ 9].local_s_3),
                   .opA  (pc_status_bank[10].local_s_3),
                   .opB  (pc_status_bank[11].local_s_3),

                   .opC  (pc_status_bank[12].local_s_3),
                   .opD  (pc_status_bank[13].local_s_3),
                   .opE  (pc_status_bank[14].local_s_3),
                   .opF  (pc_status_bank[15].local_s_3),
                   .result(global_s_3)
                 );
  comb_adder_18b sum_s4_calc (
                  .clk(clk),
                  .reset(reset),
                   .adder_start(global_start_adders),
                   .op0  (pc_status_bank[ 0].local_s_4),
                   .op1  (pc_status_bank[ 1].local_s_4),
                   .op2  (pc_status_bank[ 2].local_s_4),
                   .op3  (pc_status_bank[ 3].local_s_4),

                   .op4  (pc_status_bank[ 4].local_s_4),
                   .op5  (pc_status_bank[ 5].local_s_4),
                   .op6  (pc_status_bank[ 6].local_s_4),
                   .op7  (pc_status_bank[ 7].local_s_4),

                   .op8  (pc_status_bank[ 8].local_s_4),
                   .op9  (pc_status_bank[ 9].local_s_4),
                   .opA  (pc_status_bank[10].local_s_4),
                   .opB  (pc_status_bank[11].local_s_4),

                   .opC  (pc_status_bank[12].local_s_4),
                   .opD  (pc_status_bank[13].local_s_4),
                   .opE  (pc_status_bank[14].local_s_4),
                   .opF  (pc_status_bank[15].local_s_4),
                   .result(global_s_4)
                 );

  comb_adder_18b sum_s5_calc (
                  .clk(clk),
                  .reset(reset),
                   .adder_start(global_start_adders),
                   .op0  (pc_status_bank[ 0].local_s_5),
                   .op1  (pc_status_bank[ 1].local_s_5),
                   .op2  (pc_status_bank[ 2].local_s_5),
                   .op3  (pc_status_bank[ 3].local_s_5),

                   .op4  (pc_status_bank[ 4].local_s_5),
                   .op5  (pc_status_bank[ 5].local_s_5),
                   .op6  (pc_status_bank[ 6].local_s_5),
                   .op7  (pc_status_bank[ 7].local_s_5),

                   .op8  (pc_status_bank[ 8].local_s_5),
                   .op9  (pc_status_bank[ 9].local_s_5),
                   .opA  (pc_status_bank[10].local_s_5),
                   .opB  (pc_status_bank[11].local_s_5),

                   .opC  (pc_status_bank[12].local_s_5),
                   .opD  (pc_status_bank[13].local_s_5),
                   .opE  (pc_status_bank[14].local_s_5),
                   .opF  (pc_status_bank[15].local_s_5),
                   .result(global_s_5)
                 );

  logic [`MULT_DATA_IN_LEN-1:0] local_global_s_1, local_global_s_2, local_global_s_3, local_global_s_4, local_global_s_5;


  always_ff @( posedge clk )
  begin
    if(reset)
    begin
      local_global_s_1 <= '0;
      local_global_s_2 <= '0;
      local_global_s_3 <= '0;
      local_global_s_4 <= '0;
      local_global_s_5 <= '0;
    end
    else
    begin
      local_global_s_1 <= global_s_1;
      local_global_s_2 <= global_s_2;
      local_global_s_3 <= global_s_3;
      local_global_s_4 <= global_s_4;
      local_global_s_5 <= global_s_5;
    end
  end

  //assign s1_times_s4 = global_s_1 * global_s_4;
  //assign s2_squared = global_s_2 * global_s_2;
  //assign s3_times_s4 = global_s_3 * global_s_4;
  //assign s2_times_s5 = global_s_2 * global_s_5;
  //assign s1_times_s5 = global_s_1 * global_s_5;
  //assign s2_times_s3 = global_s_2 * global_s_3;

  logic [`DSP_MULT_OUT_SIZE-1:0] local_s1_times_s4;
  logic [`DSP_MULT_OUT_SIZE-1:0] local_s2_squared;
  logic [`DSP_MULT_OUT_SIZE-1:0] local_s3_times_s4;
  logic [`DSP_MULT_OUT_SIZE-1:0] local_s2_times_s5;
  logic [`DSP_MULT_OUT_SIZE-1:0] local_s1_times_s5;
  logic [`DSP_MULT_OUT_SIZE-1:0] local_s2_times_s3;

  multiplier m1 (
               .clk(clk),
               .X   (local_global_s_1),
               .Y   (local_global_s_4),
               .I1(),
               .I2(),
               .P   (local_s1_times_s4)
             );

  multiplier m2 (
               .clk(clk),
               .X   (local_global_s_2),
               .Y   (local_global_s_2),
               .I1(),
               .I2(),
               .P   (local_s2_squared)
             );

  multiplier m3 (
               .clk(clk),
               .X   (local_global_s_3),
               .Y   (local_global_s_4),
               .I1(),
               .I2(),
               .P   (local_s3_times_s4)
             );

  multiplier m4 (
               .clk(clk),
               .X   (local_global_s_2),
               .Y   (local_global_s_5),
               .I1(),
               .I2(),
               .P   (local_s2_times_s5)
             );

  multiplier m5 (
               .clk(clk),
               .X   (local_global_s_1),
               .Y   (local_global_s_5),
               .I1(),
               .I2(),
               .P   (local_s1_times_s5)
             );

  multiplier m6 (
               .clk(clk),
               .X   (local_global_s_2),
               .Y   (local_global_s_3),
               .I1(),
               .I2(),
               .P   (local_s2_times_s3)
             );

  logic [`DSP_MULT_OUT_SIZE-1:0] s1_times_s4;
  logic [`DSP_MULT_OUT_SIZE-1:0] s2_squared;
  logic [`DSP_MULT_OUT_SIZE-1:0] s3_times_s4;
  logic [`DSP_MULT_OUT_SIZE-1:0] s2_times_s5;
  logic [`DSP_MULT_OUT_SIZE-1:0] s1_times_s5;
  logic [`DSP_MULT_OUT_SIZE-1:0] s2_times_s3;

  always_ff @( posedge clk )
  begin
    if(reset)
    begin
      s1_times_s4 <= '0;
      s2_squared <= '0;
      s3_times_s4 <= '0;
      s2_times_s5 <= '0;
      s1_times_s5 <= '0;
      s2_times_s3 <= '0;
    end
    else
    begin
      s1_times_s4 <= local_s1_times_s4;
      s2_squared <= local_s2_squared;
      s3_times_s4 <= local_s3_times_s4;
      s2_times_s5 <= local_s2_times_s5;
      s1_times_s5 <= local_s1_times_s5;
      s2_times_s3 <= local_s2_times_s3;
    end
  end

  assign s1s4_minus_s2s2 = s1_times_s4 - s2_squared;
  assign s3s4_minus_s2s5 = s3_times_s4 - s2_times_s5;
  assign s1s5_minus_s2s3 = s1_times_s5 - s2_times_s3;

endmodule
