// newton.c
#include <stddef.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <stdint.h>
#include "circuit.h"
#include "gaussian.h"

float G_sta[MAT_SIZE][MAT_SIZE];
float I_sta[MAT_SIZE];
float G_lin[MAT_SIZE][MAT_SIZE];
float I_lin[MAT_SIZE];

int open_fpga(void) {
    int fd = open("/dev/mem", O_RDWR | O_SYNC);
    if (fd == -1) {
        perror("open");
        exit(1);
    }
    return fd;
}

void* mmap_memory(int fd) {
    void* virtual_base;

	virtual_base = mmap(NULL, FPGA_AXI_SPAN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, FPGA_AXI_BASE);

	if (virtual_base == MAP_FAILED) {
		perror("mmap");
		exit(1);
	}
    return virtual_base;
}

int my_memcpy(volatile float* source, volatile float* dest, size_t size) {
	size_t num_floats = size / sizeof(float);
	for (size_t i = 0; i < num_floats; i++) {
		dest[i] = source[i];
	//	printf("src: %f, dst: %f\n", source[i], dest[i]);
	}
	return 0;
}

// main loop to do time and nonlinear simulation

/* zero G and Ivec */
void clear_system(void) {
    for (int i = 0; i < MAT_SIZE; i++) {
        Ivec[i] = 0.0f;
        for (int j = 0; j < MAT_SIZE; j++)
            G[i][j] = 0.0f;
    }
}

/* clear everything EXCEPT static components */
void clear_system_sta(void) {
    memcpy(G_lin, G_sta, sizeof G_sta);
    memcpy(G, G_sta, sizeof G_sta);
    memcpy(I_lin, I_sta, sizeof I_sta);
    memcpy(Ivec, I_sta, sizeof I_sta);
}

/* stamp all static components */
void stamp_static(void) {
    for (int i = 0; i < ncomps; i++) {
        if (comps[i].type == STA_T && comps[i].stamp_lin) {
            comps[i].stamp_lin(&comps[i], G_sta, I_sta);
        }
    }
}

/* Main loop: linear and nonlinear */
void update_all(float t) {
    (void)t;
    // 0. retrieve static component matrix
    clear_system_sta();

    // 1. stamp all linear components
    for (int i = 0; i < ncomps; i++) {
        if (comps[i].type == LIN_T && comps[i].stamp_lin) {
            comps[i].stamp_lin(&comps[i], G, Ivec);
        }
    }

    // 2. Snapshot G, I with only linear components, and previous v
    float prev_v[MAT_SIZE];
    memcpy(G_lin, G, sizeof G);
    memcpy(I_lin, Ivec, sizeof Ivec);
    memcpy(v_prev, v, sizeof v);

    int fd = open_fpga();
    void* virtual_base = mmap_memory(fd);
    volatile float* fpga_base = (volatile float*)virtual_base;

    int ret = 0;
    int gaussian_fd;
    static const char filename[] = "/dev/gaussian";

    printf("Gaussian Userspace program started\n");

    if ((gaussian_fd = open(filename, O_RDWR)) == -1) {
        fprintf(stderr, "could not open %s\n", filename);
        return;
    }
    // 3. Newton-Raphson loop for all nonlinear components
    for (int iter = 0; iter < MAX_NR_ITER; iter++) {

    	gaussian_in_t in = {
        	.n = (uint8_t)0,
        	.g = 0,    // GO
        	.r = 1     // RESET
    	};
    	set_in(&in, gaussian_fd);

    	print_in(gaussian_fd);

	in.r = 0;
    	set_in(&in, gaussian_fd);

    	print_in(gaussian_fd);

        // 3.1 restore the linear stamps
        memcpy(G, G_lin, sizeof G);
        memcpy(Ivec, I_lin, sizeof Ivec);

        // 3.2 stamp all nonlinear components
        for (int i = 0; i < ncomps; i++) {
            if (comps[i].type == NL_T && comps[i].stamp_nl)
                comps[i].stamp_nl(&comps[i], G, Ivec);
        }

        // 3.3 solve the system
        if (PRINT_MAT)
            print_matrix(G, Ivec, nnodes);

        /* solve_system() solves in v, for Gv = I
        G, v, Ivec are all declared globally*/
        // 3.3.1 write G to FPGA
        my_memcpy((volatile float*)G, fpga_base, sizeof(G));
	
	printf("G: %f\n", G[0][0]);	
        // 3.3.2 write Ivec to FPGA
        my_memcpy(Ivec, fpga_base + sizeof(G) / sizeof(float), sizeof(Ivec));

        ret = solve_system(nnodes, gaussian_fd);
        if (ret != 0) {
            fprintf(stderr, "Error: solve_system failed with code %d\n", ret);
        }

        // 3.3.3 read v from FPGA
        my_memcpy(fpga_base + (sizeof(G) / sizeof(float)) + (sizeof(Ivec) / sizeof(float)), v, sizeof(v));

        if (PRINT_MAT)
            print_vector(v, nnodes);

        // 3.4 compute dv, check for convergence
        float maxdv = 0.0f;
        for (int n = 0; n < nnodes; n++) {
            float dv = fabsf(v[n] - prev_v[n]);
            if (dv > maxdv) maxdv = dv;
        }
        if (PRINT_MAT)
            printf("iter %d: max_res=%.6g\n", iter, maxdv);

        if (maxdv < NR_TOL) {   // converged!
            break;
        }

        // 3.5 Update every device's internal operating point
        for (int i = 0; i < ncomps; i++) {
            if (comps[i].type == NL_T && comps[i].update) {
                comps[i].update(&comps[i]); // update diode only
            }
        }

        // 3.6 prepare for next iteration
        memcpy(prev_v, v, sizeof prev_v);
    }

    if (munmap(virtual_base, FPGA_AXI_SPAN) == -1) {
		perror("munmap");
		close(fd);
		exit(1);
	}

	close(fd);

    if (gaussian_fd > 0)
	close(gaussian_fd);

    // 4) final update so device states match the converged voltages
    for (int i = 0; i < ncomps; i++) {
        if (comps[i].type == LIN_T && comps[i].update) {
            comps[i].update(&comps[i]);
        }
    }
}

/* prints the type of every component*/
void print_components(void) {
    for (int i = 0; i < ncomps; i++) {
        printf("Component %d: ", i);
        switch (comps[i].type) {
            case LIN_T: printf("LIN_T "); break;
            case STA_T: printf("STA_T "); break;
            case NL_T:  printf("NL_T ");  break;
            default:    printf("??? ");   break;
        }
        printf("\n");
    }
}
