// main.c
// software tester code for Gaussian elimination
// + random matrix generation + compare

#include "memory.h"
#include "fsm.h"
#include "debug.h"
#include <stdlib.h>
#include <math.h>
#include <time.h>

extern char *memory;
/* 128 by 128 for G, then stores 128 I and 128 v*/
int ref_pivot(int n, int k, float G[][MAT_SIZE]);
void ref_swap(int n, int k, int p, float M[][MAT_SIZE], float I[MAT_SIZE]);
void ref_inner(int k, float G[][MAT_SIZE], float I[MAT_SIZE], int n) ;
void ref_backsub(float G[][MAT_SIZE], float I[MAT_SIZE], float v_ref[MAT_SIZE], int n);
void random_mat_gen(int n);
void fixed_mat_gen();

// Loop Gaussian.c
int main(void) {
    int n = 4;

    // allocate memory and fill it with random floats
    srand((unsigned)time(NULL));
    memory = malloc(MAT_SIZE * (MAT_SIZE+2) * sizeof(float));
    fixed_mat_gen(n);
    // copy G to our local solver for validation
    float G[MAT_SIZE][MAT_SIZE];
    float I[MAT_SIZE];
    float v_ref[MAT_SIZE];
    memcpy(G, memory, MAT_SIZE * MAT_SIZE * sizeof(float));
    memcpy(I, memory + (size_t)MAT_SIZE * MAT_SIZE * sizeof(float), MAT_SIZE * sizeof(float));
    int all_ok = 1;

    // fp unit dummy models
    fp_unit_t fp_sub = {.type= 1, .latency = 1};
    fp_unit_t fp_mul = {.type=2, .latency = 1};
    fp_unit_t fp_div = {.type=3, .latency = 1};

    // run hardware back-sub simulator
    printf("-----Starting HW test-----\n");
    FILE *csv = fopen("gaussian_flat_trace.csv","w");
    gaussian_flat_print_header(csv);
    gaussian_flat_t gf;
    gaussian_flat_init(&gf, 1, n, &fp_sub, &fp_mul, &fp_div);
    while (!(gf.done)) {
        gaussian_flat_tick(&gf);
        gaussian_flat_print_csv(csv, &gf);
    }
    if (!gf.success){
        printf("Gaussian elimination failed!\n");
        return EXIT_FAILURE;
    }
    printf("Gassian elimination successfully run\n");
    if (gf.singular){
        printf("!! Singular matrix\n");
    }
    
    // software check
    for (int k = 0; k < n; k++) {
        int sw = ref_pivot(n, k, G);
        ref_swap(n, k, sw, G, I);
        ref_inner(k, G, I, n);
    }
    ref_backsub(G, I, v_ref, n);

    // compare results
    printf("----- Hardware solution: -----\n");
    for (int i = 0; i < n; i++) {
        printf("%.3f ", read_float(get_v(i)));
        float diff = fabsf(v_ref[i] - read_float(get_v(i)));
        if (diff > EPSILON) {
            printf("Mismatch at i=%d: ref=%.6f hw=%.6f (delta=%.6f)\n", i, v_ref[i], read_float(get_v(i)), diff);
            all_ok = 0;
        }
    }
    printf("\nCase's printlines \n");

    for (int i = 0; i < n; i++) {
        printf("%.3f \n", read_float(get_v(i)));
    }
    //print_matrix_mem(n);

    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++)
            printf("%8.4g ", (fabsf(G[i][j]) < 0.0001 ? 0 : G[i][j]));
        printf(" | %8.4g", v_ref[i]);
        printf(" | %8.4g\n", I[i]);
    }
    printf("\n");


    if (all_ok) {
        printf("\nPassed on random matrix test.\n");
        return EXIT_SUCCESS;
    } else {
        printf("\nFAILED on random matrix test!\n");
        return EXIT_FAILURE;
    }
}

// reference program for the hardware pivot finder
int ref_pivot(int n, int k, float G[][MAT_SIZE]) {
    int piv = k;
    for (int i = k+1; i < n; i++)
        if (fabsf(G[i][k]) > fabsf(G[piv][k]))
            piv = i;
    if (fabsf(G[piv][k]) < 1e-12f) {
        fprintf(stderr, "Singular matrix at row %d\n", k);
        exit(1);
    }
    return piv;
}

// Software reference swap rows k and p
void ref_swap(int n, int k, int p, float M[][MAT_SIZE], float I[MAT_SIZE]) {
    for (int j = k; j < n; j++) {
        float t = M[k][j];
        M[k][j] = M[p][j];
        M[p][j] = t;
    }
    float t = I[k];
    I[k] = I[p];
    I[p] = t;
}


void ref_inner(int k, float G[][MAT_SIZE], float I[MAT_SIZE], int n) {
    for (int i = k+1; i < n; i++) {
        float m = G[i][k] / G[k][k];
        for (int j = k; j <= n; j++)
            G[i][j] -= m * G[k][j];
        I[i] -= m * I[k];
    }
}

void ref_backsub(float G[][MAT_SIZE], float I[MAT_SIZE], float v_ref[MAT_SIZE], int n) {
    for (int i = n-1; i >= 0; i--) {
        float sum = I[i];  // RHS
        for (int j = i+1; j < n; j++){
            sum -= G[i][j] * v_ref[j];
            }
        v_ref[i] = sum / G[i][i];
    }
}

void random_mat_gen(int n) {

    for (int i = 0; i < n; i++) {
        for (int j = 0; j < n; j++) {
            float r = ((float)rand() / RAND_MAX) * 200.0f - 100.0f;
            size_t idx = (size_t)i * n + j;
            memcpy(memory + idx * sizeof(float), &r, sizeof(float));
        }
    }
    
    // 2) fill I (RHS) with random floats in [-100,100]
    size_t I_base_bytes = (size_t)MAT_SIZE * MAT_SIZE * sizeof(float);

    for (int i = 0; i < n; i++) {
        float r = ((float)rand() / RAND_MAX) * 200.0f - 100.0f;
        memcpy(memory + I_base_bytes + i * sizeof(float), &r, sizeof(float));
    }
    print_matrix_mem(n);
}

void fixed_mat_gen() {
    int n = 4;
            /* 4×4 test matrix */
        static const float A[4][4] = {
            { -12.54f,  56.58f,   1.138f,  -70.59f },
            { -10.31f,  45.32f, -43.700f,   60.69f },
            {  82.89f, -58.82f,  16.250f,   61.34f },
            {  23.99f, -19.60f,  37.840f,   22.39f }
        };
        /* RHS vector */
        static const float I[4] = {
            -72.77f,
             80.96f,
             13.96f,
            -95.36f
        };

        /* copy A into your row-major memory */
        for (int i = 0; i < 4; i++) {
            for (int j = 0; j < 4; j++) {
                size_t idx = (size_t)i * n + j;
                memcpy(memory + idx * sizeof(float),
                       &A[i][j],
                       sizeof(float));
            }
        }
        /* copy I into the RHS */
        size_t I_base = (size_t)MAT_SIZE * MAT_SIZE * sizeof(float);
        for (int i = 0; i < 4; i++) {
            memcpy(memory + I_base + i * sizeof(float),
                   &I[i],
                   sizeof(float));
        }

        print_matrix_mem(n);
        return;
}

