/*
 * Userspace program that communicates with the led_vga device driver
 * primarily through ioctls
 *
 * Stephen A. Edwards
 * Columbia University
 */

#include <stdio.h>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>

#include "./fifo.h"

#define BUFFERS  8
#define NTHREADS 8
#define BUFFER_SIZE 20

typedef enum {READ, WRITE} transfer_type;

int dpu_fd;

typedef struct {
	unsigned int fifo_id;
	transfer_type t;
	/* if t is WRITE then DATA has been allocated and there are LENGTH elements to transfer
	   otherwise DATA is a buffer of length LENGTH (you should check that is is enough to contain the data)
	 */
	int * data;
	int length;
} job;

int in0[8] = {1,2,3,4,5,6,7,8};
int in1[8] = {1,3,3,3,5,6,7,8};
int in2[8] = {'a','b','c','d','e','f','g','h'};
int in3[10] = {1,1,2,3,3,6,7,7,8,10};
int in4[10] = {1,2,3,4,5,6,7,8,9,10};


void * worker (void* arg){

	int to_write, fill, length, i, actually_read;
	job * j = (job*) arg;
	opcode op;

	if (j->t == READ){
		printf("Read thread started\n");
		length = 0;

		do{

			/* create a struct for the job */
			op = (opcode) {FIFO_SIZE, 0, j->fifo_id, &(j->data[length])};
			/* tell the driver to copy stuff */
			if (ioctl(dpu_fd, FIFO_READ_DATA, &op)) {
				perror("ioctl(FIFO_READ_DATA) failed");
				return (void*) -1;
			}

#ifdef DEBUG
			for ( i = length ; i < length + op.length ; i++ ){
				printf("Data: %d\n",j->data[i]);
			}
#endif			

			/* adjust pointer in the read buffer */
			length += op.length;
			
			if (op.length == 0){
#ifdef DEBUG
				//printf("I've read %d elements - I am going to yield\n",op.length);
#endif
				pthread_yield();
			}

		}while(!op.done); /* until you see the done signal reported back by ioctl*/

		j->length = length; //write back how many elements were actually read

		printf("READER is done!\n");
	}else{            /*write task*/
		printf("Writer[%d] started\n",j->fifo_id);
		length = 0;
		while (length < j->length){

			/* see if we are at the end of the stream */
			to_write = MIN( FIFO_SIZE  , j->length - length );
			op = (opcode) {to_write, 0, j->fifo_id, &(j->data[length])};
			
			if( length + to_write == j->length ){
				printf("WRITER[%d] is sending done!\n",j->fifo_id);
				op.done = 1;
			}

#ifdef DEBUG			
			printf("Writer[%d] - I am going to ship %d element\n",j->fifo_id,to_write);
#endif
			/* tell the driver to copy stuff */
			if (ioctl(dpu_fd, FIFO_WRITE_DATA, &op)) {
				perror("ioctl(FIFO_WRITE_DATA) failed");
				return (void*) -1;
			}
			/* adjust index in the write buffer */
			length += op.length;
			if (!op.length){
#ifdef DEBUG
				//printf("I've wrote %d elements - I am going to yield\n",op.length);
#endif
				pthread_yield();
			}
		}
		printf("WRITER is done!\n");
	}
}

int main(int argc, char * argv[])
{
	int i,length,j,join_length,aggr_length;

	int * data[BUFFERS];

	char filename[] = "/dev/dpu";

	if ( (dpu_fd = open(filename, O_RDWR)) == -1) {
		fprintf(stderr, "could not open %s\n", filename);
		return -1;
	}

	//allocate input/output buffers
	for ( i = 0 ; i < BUFFERS ; i++ ){
		data[i] = calloc(BUFFER_SIZE,sizeof(int));
	}

	srand(time(NULL));

	/* for (i = 0 ; i < BUFFERS ; i++ ){ */
	/* 	for (j = 0 ; j < BUFFER_SIZE ; j++ ){ */
	/* 		data_in[i][j]  = 1; */
	/* 		data_out[i][j] = 1; */
	/* 	} */
	/* } */
	pthread_t threads[NTHREADS];

	job jobs[NTHREADS];
 
	//send data to a boolgen to compare two columns
	printf("BOOLGEN\n");
	jobs[0] = (job) {8 , WRITE, in0, 8};
	jobs[1] = (job) {9 , WRITE, in1, 8};
	jobs[2] = (job) {10, READ , data[0],8};

	pthread_create(&(threads[0]), NULL,worker,&(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker,&(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker,&(jobs[2]));

	for ( i = 0 ; i < 3 ; i++ ){
		pthread_join(threads[i],NULL);
	}

	//use boolgen result to filter original data 
	printf("COLFILTER1\n");
	jobs[0] = (job) {11 , WRITE, data[0], 8};
	jobs[1] = (job) {12 , WRITE, in0, 8};
	jobs[2] = (job) {13, READ , data[1],8};

	pthread_create(&(threads[0]), NULL,worker, &(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker, &(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker, &(jobs[2]));
	pthread_join(threads[0],NULL);
	pthread_join(threads[1],NULL);
	pthread_join(threads[2],NULL);

	//use boolgen result to filter a payload column
	printf("COLFILTER2\n");
	jobs[0] = (job) {11 , WRITE, data[0], 8};
	jobs[1] = (job) {12 , WRITE, in2, 8};
	jobs[2] = (job) {13, READ , data[2],8};

	pthread_create(&(threads[0]), NULL,worker, &(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker, &(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker, &(jobs[2]));
	pthread_join(threads[0],NULL);
	pthread_join(threads[1],NULL);
	pthread_join(threads[2],NULL);

	//join with another table using the filter data as candidate
	printf("JOIN\n");
	jobs[0] = (job) {14 , WRITE, data[1], 6};
	jobs[1] = (job) {15 , WRITE, in3, 10};
	jobs[2] = (job) {16 , WRITE, data[2],6};
	jobs[3] = (job) {17 , WRITE ,in4,10};
	jobs[4] = (job) {18 , READ , data[3],20};
	jobs[5] = (job) {19 , READ , data[4],20};
	jobs[6] = (job) {20 , READ , data[5],20};
	jobs[7] = (job) {21 , READ , data[6],20};

	for ( i = 0 ; i < NTHREADS ; i++ ){
		pthread_create(&(threads[i]),NULL,worker,&(jobs[i]));
	}
	for ( i = 0 ; i < NTHREADS ; i++ ){
		pthread_join(threads[i],NULL);
	}
	join_length = jobs[7].length - 1; //remove done packet
	//print result of the join
	data[3][join_length] = 0; //clean up done bit data - probably not necessary
	for ( i = 0 ; i < join_length+1; i++ ){
		printf("%d\t",data[3][i]);
		printf("%d\t",data[4][i]);
		printf("%d\t",data[5][i]);
		printf("%d\n",data[6][i]);
	}

	//operate with alu on payload column of foreign table
	printf("ALU\n");
	jobs[0] = (job) {22 , WRITE, data[3], join_length};
	jobs[1] = (job) {23 , WRITE, data[6], join_length};
	jobs[2] = (job) {24 , READ,  data[7], BUFFER_SIZE};
	pthread_create(&(threads[0]), NULL,worker, &(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker, &(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker, &(jobs[2]));
	pthread_join(threads[0],NULL);
	pthread_join(threads[1],NULL);
	pthread_join(threads[2],NULL);

	//Finally perform aggregation
	printf("AGGR 1\n");
	jobs[0] = (job) {25 , WRITE, data[3], join_length+1};
	jobs[1] = (job) {26 , WRITE, data[7], join_length+1};
	jobs[2] = (job) {27 , READ, data[8],BUFFER_SIZE};
	pthread_create(&(threads[0]), NULL,worker, &(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker, &(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker, &(jobs[2]));
	pthread_join(threads[0],NULL);
	pthread_join(threads[1],NULL);
	pthread_join(threads[2],NULL);

	printf("AGGR 2 (no-op)\n");
	jobs[0] = (job) {25 , WRITE, data[3], join_length+1};
	jobs[1] = (job) {26 , WRITE, data[3], join_length+1};
	jobs[2] = (job) {27 , READ, data[9],BUFFER_SIZE};
	pthread_create(&(threads[0]), NULL,worker, &(jobs[0]));
	pthread_create(&(threads[1]), NULL,worker, &(jobs[1]));
	pthread_create(&(threads[2]), NULL,worker, &(jobs[2]));
	pthread_join(threads[0],NULL);
	pthread_join(threads[1],NULL);
	pthread_join(threads[2],NULL);

	aggr_length = jobs[2].length - 1; //remove done packet

	for ( i = 0 ; i < aggr_length+1; i++ ){
		printf("%d\t",data[9][i]);
		printf("%d\n",data[8][i]);
	}
	
	printf("DPU Userspace program terminating: %d\n",i);
	return 0;
}
