module processor(
              input logic         clk,
              input logic [11:0]  a,
              input logic         reset,
              input logic [7:0]   din,
              input logic         we,
              input               chipselect,
              output logic [7:0]  VGA_R, VGA_G, VGA_B,
              output logic        VGA_CLK, VGA_HS, VGA_VS,
                                  VGA_BLANK_n,
              output logic        VGA_SYNC_n,
              output logic [7:0]  dout);

    logic        ram_we;
    logic [7:0]  bus_from_cpu;
    logic [7:0]  bus_to_cpu;
    logic [11:0] address_from_cpu;
    logic        sw_we;

    assign sw_we = chipselect && we;
   /* 
    cpu  c (clk,
        reset,
        bus_to_cpu,
        bus_from_cpu,
        ram_we,
        address_from_cpu);*/
    mem m ( // _a is for software, _b is for cpu
        .address_a(a),
        .address_b(address_from_cpu),
        .clock(clk),
        .data_a(din),
        .data_b(bus_from_cpu),
        .wren_a(sw_we),
        .wren_b(ram_we),
        .q_a(dout),
        .q_b(bus_to_cpu)
    );


endmodule


module alu(input logic        clk,
           input logic  [7:0] D_register,
           input logic  [3:0] I_register,
           input logic  [3:0] N_register,
           input logic        DF,
           input logic  [7:0] operand,
           output logic [7:0] result,
           output logic       carry,
           output logic       out_rdy
);
    logic [8:0] tmp;

    always_comb begin
        out_rdy = 1;

        unique case ({I_register, N_register})
            // ADC(I): add with carry
            8'h74, 8'h7c: tmp = D_register + operand + DF;

            // SDB/SDI: subtract with borrow
            8'h75, 8'h7d: tmp = operand + ~D_register + DF + 1;

            // SMB(I): subtract memory with borrow
            8'h77, 8'h7f: tmp = D_register + ~operand + DF + 1;

            // OR(I): logical or
            8'hf1, 8'hf9: tmp = {DF, operand | D_register};

            // AND/ANI: logical and
            8'hf2, 8'hfa: tmp = {DF, operand & D_register};

            // XOR/XRI: exclusive or
            8'hf3, 8'hfb: tmp = {DF, operand ^ D_register};

            // ADD/ADI: add without carry
            8'hf4, 8'hfc: tmp = operand + D_register;

            // SD(I): subtract without borrow
            8'hf5, 8'hfd: tmp = operand + ~D_register + 1;

            // SM(I): subtract memory without borrow
            8'hf7, 8'hff: tmp = D_register + ~operand + 1;

            // No-op for all other instructions
            default:
                begin
                    tmp = {DF, D_register};
                    out_rdy = 0;
                end
        endcase

        result = tmp[7:0];
        carry = tmp[8] ^ (N_register[2:0] != 3'b100);
    end
endmodule



module cpu(input logic        clk,
           input logic        reset,
           input logic [7:0]  bus_from_ram,
           output logic [7:0] bus_to_ram,
           output logic       ram_we,
           output logic [11:0] address_to_ram
);

    logic [15:0] R_registers[15:0];
    logic [3:0] N_register ;
    logic [3:0]P_register ;
    logic [3:0]X_register ;
    logic [3:0]I_register ;
    logic [7:0]D_register ;
    logic [7:0]T_register ;
    logic B_registers[3:0];
    logic Q;
    logic DF; //carry bit
    logic IE;
    logic idle;

    logic [7:0] alu_operand;
    logic [7:0] alu_result;
    logic       alu_carry;
    logic       alu_out_rdy;

    alu a (.operand(alu_operand), .result(alu_result), .carry(alu_carry), .out_rdy(alu_out_rdy), .*);

    enum logic [2:0] {S_FETCH, S_EXECUTE, S_EXECUTE2, S_DMA, S_INTERRUPT} state;

    // n.b: the CLEAR and WAIT signals are active low and need to be inverted
    // Also, Quartus won't synthesize this if it's an enum because we assign a non-enum packed array
    // to it later, so the constants are defined separately.
    logic [1:0] mode = 2'b0;

    localparam logic [1:0] M_RUN = 2'h3;
    localparam logic [1:0] M_PAUSE = 2'h2;
    localparam logic [1:0] M_RESET = 2'h1;
    localparam logic [1:0] M_LOAD = 2'h0;

    localparam logic [4:0] RAM_D_REG   = 5'b10000;
    localparam logic [4:0] RAM_T_REG   = 5'b10001;
    localparam logic [4:0] RAM_ALU_OP  = 5'b10010;
    localparam logic [4:0] RAM_XP_REG  = 5'b10011;

    logic [7:0] R_value;

    struct packed {
        logic is_pending;
        logic we;
        logic [4:0] R_reg_sel;
        logic [11:0] addr;
        logic [7:0] value;
    } ram_task;

    logic should_branch;
    logic should_skip;

    // Most of the S_EXECUTE (S1) logic is here, it's just cleaner to
    // have it as a task rather than as a deeply nested pile of code.
    task execute;
        // 'unique' acts as a hint to the synthesizer. Some more reading here:
        // https://www.verilogpro.com/systemverilog-unique-priority/
        unique casez (I_register)
            4'h0:
                begin
                    // IDL (FIXME: CPU ignores idle signal)
                    if (!N_register) idle <= 1;
                    // LDN - load 'D' from R(N)
                    else D_register <= R_registers[N_register][7:0];
                end

            // INC/DEC R(N)
            4'h1: R_registers[N_register] <= R_registers[N_register] + 1;
            4'h2: R_registers[N_register] <= R_registers[N_register] - 1;

            // Branch instructions
            4'h3:
                begin
                    unique case (N_register)
                        4'h0: should_branch = 1'b1;
                        4'h1: should_branch = (Q == 1);
                        4'h2: should_branch = (!D_register);
                        4'h3: should_branch = (DF == 1);
                        4'h4: should_branch = (B_registers[0] == 1);
                        4'h5: should_branch = (B_registers[1] == 1);
                        4'h6: should_branch = (B_registers[2] == 1);
                        4'h7: should_branch = (B_registers[3] == 1);
                        4'h8: should_branch = 1'b0;
                        4'h9: should_branch = (Q == 0);
                        4'ha: should_branch = (D_register != 0);
                        4'hb: should_branch = (DF != 1);
                        4'hc: should_branch = (!B_registers[0]);
                        4'hd: should_branch = (!B_registers[1]);
                        4'he: should_branch = (!B_registers[2]);
                        4'hf: should_branch = (!B_registers[3]);
                    endcase

                    // We don't know what the address to jump to is yet.
                    // Read the byte referenced by the program counter
                    // into the program counter.
                    ram_task = '{should_branch, 0, P_register, R_registers[P_register], 0};

                    // If not branching, move program counter forward a byte;
                    // the program counter gets advanced during FETCH to skip
                    // the jump entirely.
                    //
                    // If we ARE branching, this will get overwritten when
                    // the ram_task is processed.
                    R_registers[P_register] <= R_registers[P_register] + 1;
                end

            // Load Advance (LDA)
            4'h4:
                begin
                    ram_task = '{1, 0, RAM_D_REG, R_registers[N_register], 0};
                    R_registers[N_register] <= R_registers[N_register] + 1;
                end

            // Store (STR)
            4'h5: ram_task = '{1, 1, N_register, R_registers[N_register], D_register};

            4'h6:
                begin
                    unique casez (N_register)
                        // IRX: increment R(X) by 1
                        4'h0: R_registers[X_register] <= R_registers[X_register] + 1;

                        // 61-67: write R(X) to bus and increment R(X)
                        // FIXME: we don't have a bus, performs a NOP instead
                        4'b0001,
                        4'b001?,
                        4'b01??:
                              D_register <= D_register;

                        // 69-6F: write bus value to R(X) and D registers
                        // (opcode 68 is undefined, but the case logic is nicer if we just
                        // make it another BUS->R(X) instruction)
                        // FIXME: we don't have a bus, performs a NOP instead
                        4'b1???:
                             D_register <= D_register;
                    endcase
                end

            4'h7,
            4'hf:
                begin
                    unique case ({I_register, N_register})
                        // RET: return and enable interrupts
                        // DIS: return and disable interrupts
                        8'h70,
                        8'h71:
                            begin
                                ram_task = '{1, 0, RAM_XP_REG, R_registers[X_register], 0};
                                IE <= ~(N_register[0]);
                                // Incrementing R(X) is done on a later clock cycle.
                            end

                        // LDXA: load via X and advance
                        8'h72:
                            begin
                                ram_task = '{1, 0, RAM_D_REG, R_registers[X_register], 0};
                                R_registers[X_register] <= R_registers[X_register] + 1;
                            end

                        // STXD: store via X and decrement
                        8'h73:
                            begin
                                ram_task = '{1, 1, X_register, R_registers[X_register], D_register};
                                R_registers[X_register] <=  R_registers[X_register] - 1;
                            end

                        // SAV: save
                        8'h78: ram_task = '{1, 0, RAM_T_REG, R_registers[X_register], 0};

                        // MARK: push X,P to stack
                        8'h79:
                            begin
                                T_register <= {X_register, P_register};
                                ram_task = '{1, 1, 0, R_registers[2], {X_register, P_register}};
                                P_register <= X_register;
                                R_registers[2] <= R_registers[2] - 1;
                            end

                        // REQ: Q = 0
                        8'h7a: Q <= 0;

                        // SEQ: Q = 1
                        8'h7b: Q <= 1;

                        // LDX: load via X
                        8'hf0:   ram_task = '{1, 0, RAM_D_REG, R_registers[X_register], 0};

                        // LDI: Load immediate
                        8'hf8:
                            begin
                                ram_task = '{1, 0, RAM_D_REG, R_registers[P_register], 0};
                                R_registers[P_register] <= R_registers[P_register] + 1;
                            end

                        /////////////////// Logic/Arithmetic operations \\\\\\\\\\\\\\\\\\

                        // A lot of arithmetic operations need to read from memory before
                        // calculating the result. Instructions that need to read via the
                        // same register are all executed with the same ram_task; the
                        // actual arithmetic is done on the third clock cycle of the
                        // S_EXECUTE state.
                        //
                        // The actual logic for arithmetic and logical operations isn't
                        // here. For that, check the `alu` module.
                        //
                        // Read via R(X):
                        // 0x74 | ADC | add with carry
                        // 0x75 | SDB | subtract with borrow
                        // 0x77 | SMB | subtract memory with borrow
                        // 0xf1 | OR  | logical or
                        // 0xf2 | AND | logical and
                        // 0xf3 | XOR | exclusive or
                        // 0xf4 | ADD | add without carry
                        // 0xf5 | SD  | subtract without borrow
                        // 0xf7 | SM  | subtract memory without borrow
                        8'h74,
                        8'h75,
                        8'h77,
                        8'hf1,
                        8'hf2,
                        8'hf3,
                        8'hf4,
                        8'hf5,
                        8'hf7:
                            ram_task = '{1, 0, RAM_ALU_OP, R_registers[X_register], 0};

                        // Read via R(P):
                        // 0x7c | ADCI | add immediate with carry
                        // 0x7d | SDI  | subtract immediate with borrow
                        // 0x7f | SMBI | subtract memory immediate with borrow
                        // 0xf9 | ORI  | logical or w/ immediate
                        // 0xfa | ANI  | logical and w/ immediate
                        // 0xfb | XRI  | exclusive or w/ immediate
                        // 0xfc | ADI  | add immediate without carry
                        // 0xfd | SDI  | subtract immediate without borrow
                        // 0xff | SMI  | subtract memory immediate without borrow
                        8'h7c,
                        8'h7d,
                        8'h7f,
                        8'hf9,
                        8'hfa,
                        8'hfb,
                        8'hfc,
                        8'hfd,
                        8'hff:
                            begin
                                ram_task = '{1, 0, RAM_ALU_OP, R_registers[P_register], 0};
                                R_registers[P_register] <= R_registers[P_register] + 1;
                            end

                        // SHRC: ring shift right (0x76)
                        // SHR: Shift right without carry (0xf6)
                        8'h76,
                        8'hf6:
                            begin
                                DF <= D_register[0];
                                D_register <= {D_register[0] & ~I_register[3], D_register[7:1]};
                            end

                        // SHLC: ring shift left (0x7e)
                        // SHL: Shift left without carry (0xfe)
                        8'h7e,
                        8'hfe:
                            begin
                                DF <= D_register[7];
                                D_register <= {D_register[6:0], D_register[7] & ~I_register[3]};
                            end
                    endcase
                end

            // GET LOW
            4'h8:   D_register <= R_registers[N_register][7:0];
            // GET HIGH
            4'h9:   D_register <= R_registers[N_register][15:8];
            // PUT LOW
            4'ha:   R_registers[N_register][7:0] <= D_register;
            // PUT HIGH
            4'hb:   R_registers[N_register][15:8] <= D_register;

            // SEP
            4'hd:   P_register <= N_register;
            // SEX
            4'he:   X_register <= N_register;

            //////////////////// Long branches \\\\\\\\\\\\\\\\\\\\
            4'hc:
                begin
                    unique case (N_register[2:0])
                        4'h0: should_branch = 1;
                        4'h1: should_branch = Q;
                        4'h2: should_branch = (!D_register);
                        4'h3: should_branch = DF;
                        4'h4: should_branch = ~IE;
                        4'h5: should_branch = ~Q;
                        4'h6: should_branch = (D_register != 8'h0);
                        4'h7: should_branch = ~DF;
                    endcase

                    // C8-CF are the same operations as C0-C7 with inverted conditions,
                    // with the exception of C4, which is a no-op.
                    should_branch = (should_branch ^ N_register[3]) & (N_register != 4'h4);

                    // Are these skip instructions?
                    if (N_register[2])
                    begin
                        should_skip = should_branch;
                        should_branch = 0;
                    end
                    else
                    begin
                        should_skip = ~should_branch;
                    end

                    // Kind of cheating by using the ALU operand as a scratch register
                    ram_task = '{should_branch, 0, RAM_ALU_OP, R_registers[P_register] + 1, 0};
                    R_registers[P_register] <= R_registers[P_register] + (should_skip ? 2 : 0);
                end
        endcase
    endtask

    logic [1:0] clock_counter;

    always_ff @(posedge clk)
    begin
        //mode = M_RUN;
        if (mode == M_LOAD)
        begin
            address_to_ram = 12'h10;
            if (bus_from_ram == 8'b1) 
                mode <= M_RESET; 
        end
        if (reset || mode == M_RESET)
        begin
            I_register <= 0;
            N_register <= 0;
            P_register <= 0;
            Q <= 0;
            IE <= 1;
            bus_to_ram = 0;
            ram_we = 0;
            address_to_ram = 12'b0;
            state <= S_FETCH;

            clock_counter <= 0;

            // FIXME: Datasheet p. 3-21 says that registers X, P, and R(0)
            // are initialized on the first machine cycle _after_ RESET is
            // terminated; do we need to adhere strictly to that or is it
            // fine to do this all in one go?
            X_register <= 0;
            P_register <= 0;
            R_registers[0] <= 0;
            mode <= M_RUN;
        end
        else if (mode == M_RUN)
        begin
            unique case (state)
                S_FETCH:
                    begin
                        if (clock_counter == 0)
                        begin
                            ram_we = 0;
                            address_to_ram = R_registers[P_register][11:0];
                        end
                        // Stall for one cycle to match the EXECUTE machine cycle
                        else if (clock_counter == 2)
                        begin
                            I_register <= bus_from_ram[7:4];
                            N_register <= bus_from_ram[3:0];
                            R_registers[P_register] <= R_registers[P_register] + 1;
                            state <= S_EXECUTE;
                        end
                    end

                S_EXECUTE:
                    begin
                        if (clock_counter == 0)
                        begin
                            execute();

                            if (ram_task.is_pending)
                            begin
                                address_to_ram = ram_task.addr;
                                ram_we = ram_task.we;
                                bus_to_ram = ram_task.value;
                            end
                        end
                        else if (clock_counter == 1)
                        begin
                            if (ram_task.is_pending)
                            begin
                                unique case (ram_task.R_reg_sel)
                                    RAM_D_REG:  D_register <= bus_from_ram;
                                    RAM_T_REG:  T_register <= bus_from_ram;
                                    RAM_ALU_OP: alu_operand <= bus_from_ram;
                                    RAM_XP_REG: {X_register, P_register} <= bus_from_ram;
                                    default:    R_registers[ram_task.R_reg_sel[3:0]] <= {R_registers[ram_task.R_reg_sel[3:0]][15:8], bus_from_ram};
                                endcase
                            end

                            ram_task.is_pending = 0;
                            ram_we = 0;
                        end
                        else
                        begin
                            // Await the results of any arithmetic operations.
                            // If we have no ALU results, discard the output.
                            if (alu_out_rdy)
                            begin
                                D_register <= alu_result;
                                DF <= alu_carry;
                            end

                            // This is also where we handle the RET and DIS
                            // instructions' increment on R(X).
                            if (I_register == 4'h7 && !(N_register & 4'b1110))
                                R_registers[X_register] <= R_registers[X_register] + 1;

                            // User manual says everything, including the NOP,
                            // goes to the EXEC2 state.
                            if (I_register == 4'hc) state <= S_EXECUTE2;
                            else state <= S_FETCH;
                        end
                    end

                S_EXECUTE2:
                    begin
                        if (clock_counter == 0)
                        begin
                            address_to_ram = R_registers[P_register][11:0];
                        end
                        else if (clock_counter == 1)
                        begin
                            if (should_branch) R_registers[P_register] <= {bus_from_ram, alu_operand};
                        end
                        else state <= S_FETCH;
                    end

                // TODO: DMA
                S_DMA:
                    begin
                    end

                // TODO: interrupt
                S_INTERRUPT:
                    begin
                    end
            endcase

            if (clock_counter < 2) clock_counter <= clock_counter + 1;
            else clock_counter <= 0;
        end
    end
endmodule
