// metasvmkc.cpp 
//
// 
//
//


// Does Multi-Task SVM classification with KERNEL selection
// in a meta-learning scenario. This version assumes that
// the inputs are actually the same for each task and
// only the outputs vary.



#include "stdafx.h"

#ifndef DRAND
#define DRAND ((double)rand()/(double)RAND_MAX)
#endif

#ifndef M_PI
#define M_PI 3.1415927
#endif


#ifndef SQRD
#define SQRD(x) ((x)*(x))
#endif

#ifndef MIN
#define MIN(x,y)  ( (x)<(y)  ?   (x):(y) )
#endif

#ifndef MAX
#define MAX(x,y)  ( (x)>(y)  ?   (x):(y) )
#endif


#define MAXLINE  500000
#define VTINY     1e-9
#define TINY      1e-3

// For Keyboard Interface
int TermMask;
int Key;
int ch;

// Command Line Arguments
char   inputFile[256];
char   taskFile[256];
char   kernelFile[256];
char   extensionFile[256];
char   lambdas_fname[256];
char   model_fname[256];
char   switcher_fname[256];
char   outputs_fname[256];
char   lambdaFile[256];
int    ntrain = -1;
double p     = 1.0;    // probability the feature is on
double c     = 30.0;
double negc  = 10.0;
double sigma = 3.0;
int    iters = -5;
int    meta  = 1; // Coupled feature selection or not...


#define KERNEL_LINEAR   0
#define KERNEL_POLY     1
#define KERNEL_RBF      2
#define KERNEL_ERBF     3
#define KERNEL_SIGMOID  4
#define KERNEL_SELFPOLY 5

int nkernels=1;
int kerneltype = KERNEL_LINEAR;
double p1 = 0.1;
double p2 = 3.0;


double
kernel(Matrix X, int R, Matrix Xt, int C)
{
  int i, D = X->cols;
  double val = 0.0;
  if (kerneltype==KERNEL_LINEAR)
    {
      for (i=0; i<D; i++) val += X->d2[R][i]*Xt->d2[C][i];
    }
  else if (kerneltype==KERNEL_POLY)
    {
      for (i=0; i<D; i++) val += X->d2[R][i]*Xt->d2[C][i];
      val = pow(val+1.0,p1);
    }
  else if (kerneltype==KERNEL_RBF)
    {
      for (i=0; i<D; i++) val += SQRD(X->d2[R][i]-Xt->d2[C][i]);
      val = exp(-val / (2.0*p1*p1));
    }
  else if (kerneltype==KERNEL_ERBF)
    {
      for (i=0; i<D; i++) val += SQRD(X->d2[R][i]-Xt->d2[C][i]);
      val = exp(-sqrt(val) / (2.0*p1*p1));
    }
  else if (kerneltype==KERNEL_SIGMOID)
    {
      for (i=0; i<D; i++) val += X->d2[R][i]*Xt->d2[C][i];
      val = tanh(p1*val/((double) D) + p2);
    }
  else if (kerneltype==KERNEL_SELFPOLY)
    {
      for (i=0; i<D; i++) val += pow(X->d2[R][i]*Xt->d2[C][i]+1.0,p1);
    }

  return(val);
}


double
texp(double c)
{
	if (c<-500.0)
	{
		c = -500.0;
	}
	else if (c>200.0)
	{
		fprintf(stderr,"OVERFLOWING exp(%e)\n",c);
	}
	return(exp(c));
}

double
logZgammaT(double c, double lt)
{
	return( lt + log(1.0-lt/c) );
}

// Data, Parameters and Costs
int    N, D, M; // N = # of data points, D = dimensions, M = # of tasks
Matrix X, Xt;
Matrix HMMi;
Matrix HMMv;
Matrix HMMt;
Volume K;
Matrix Y, Yt;
Matrix W;
Vector TW;
Matrix lambdas;
Matrix oldLambdas;
Vector lZtheta;
Matrix lZgamma;

Vector lZb1;
Vector bias;
double lZb;
double lZgam;
double lZt;
Matrix switcher;



double
logPartitionFast(int INDEX, int TASK, double newl, double oldl)
{
	int d;
	int I;
	double val=0.0;
	
	I = INDEX;
	lZgam += lZgamma->d2[TASK][I];
	lZgamma->d2[TASK][I] = logZgammaT(c,newl);
	lZgam -= lZgamma->d2[TASK][I];
	


	if (meta)
	{
		for (d=0; d<nkernels; d++)
		{			
			double H11 = K->d3[d][I][I];
			double H12 = 0.0;
			int i;
			for (i=0;i<N;i++)
			{
				if (i!=I) H12 += lambdas->d2[TASK][i]*Y->d2[TASK][i]*K->d3[d][I][i];
			}
			H12 *= Y->d2[TASK][I];

			TW->d[d]       -= W->d2[TASK][d];
			W->d2[TASK][d] -= SQRD(oldl)*H11+2.0*oldl*H12;
			W->d2[TASK][d] += SQRD(newl)*H11+2.0*newl*H12;
			TW->d[d]       += W->d2[TASK][d];
			lZtheta->d[d]   = 0.5*TW->d[d] + log(p + (1.0-p)*texp(-0.5*TW->d[d]));
		}
	}
	else
	{
		for (d=0; d<nkernels; d++)
		{
			double H11 = K->d3[d][I][I];
			double H12 = 0.0;
			int i;
			for (i=0;i<N;i++)
			{
				if (i!=I) H12 += lambdas->d2[TASK][i]*Y->d2[TASK][i]*K->d3[d][I][i];
			}
			H12 *= Y->d2[TASK][I];

			lZtheta->d[d]  -= 0.5*W->d2[TASK][d] + log(p+(1.0-p)*texp(-0.5*W->d2[TASK][d]));
			W->d2[TASK][d] -= SQRD(oldl)*H11+2.0*oldl*H12;
			W->d2[TASK][d] += SQRD(newl)*H11+2.0*newl*H12;
			lZtheta->d[d]  += 0.5*W->d2[TASK][d] + log(p+(1.0-p)*texp(-0.5*W->d2[TASK][d]));
		}
	}
	
	lZb           -= 0.5*sigma*SQRD(lZb1->d[TASK]);
	lZb1->d[TASK] += (newl-oldl)*Y->d2[TASK][I];
	lZb           += 0.5*sigma*SQRD(lZb1->d[TASK]);
	lZt   = VectorSum(lZtheta);
	val   = lZgam+lZt+lZb;
	return(val);
}

double
logPartition(Matrix l)
{
	int i,j,m,d;
	double total=0.0;
	
	for (m=0; m<M; m++)
	{
		for (i=0; i<N; i++) lZgamma->d2[m][i] = logZgammaT(c,l->d2[m][i]);
	}
	lZgam = -VectorSum((Vector) lZgamma);
	
	lZb = 0.0;
	for (m=0; m<M; m++)
	{
		double val = 0.0;
		for (i=0; i<N; i++) val += l->d2[m][i]*Y->d2[m][i];
		lZb1->d[m] = val;
		lZb   += 0.5*sigma*val*val;
	}
	

	if (meta)
	{
		for (d=0; d<nkernels; d++)
		{
			TW->d[d] = 0.0;
			for (m=0; m<M; m++)
			{
				double val = 0.0;
				for (i=0; i<N; i++)
					for (j=0; j<N; j++)
						val += l->d2[m][i]*Y->d2[m][i]*l->d2[m][j]*Y->d2[m][j]*K->d3[d][i][j];
				W->d2[m][d] = val; // W IS PRE SQUARED DUE TO THE KERNEL TRICK!!!
				TW->d[d] += val;
			}
			lZtheta->d[d] = 0.5*TW->d[d] + log(p + (1.0-p)*texp(-0.5*TW->d[d]));
		}
	}
	else
	{
		for (d=0; d<nkernels; d++)
		{
			lZtheta->d[d] = 0.0;
			for (m=0; m<M; m++)
			{
				double val = 0.0;
				for (i=0; i<N; i++) 
					for (j=0; j<N; j++)
						val += l->d2[m][i]*Y->d2[m][i]*l->d2[m][j]*Y->d2[m][j]*K->d3[d][i][j];
				W->d2[m][d] = val; // W IS PRE SQUARED DUE TO THE KERNEL TRICK!!!
				lZtheta->d[d] += 0.5*W->d2[m][d] + log(p+(1.0-p)*texp(-0.5*W->d2[m][d]));
			}
		}
	}

	lZt   = VectorSum(lZtheta);
	total = lZgam+lZt+lZb;
	return(total);
}

#ifndef MIN
#define MIN(x,y)  ( (x)<(y)  ?   (x):(y) )
#endif

#ifndef MAX
#define MAX(x,y)  ( (x)>(y)  ?   (x):(y) )
#endif

void
getModel(Matrix l)
{
  int i, m, d;
  for (m=0; m<M; m++)
  {
	  double b=0;
	  for (i=0; i<N; i++) b += sigma*(l->d2[m][i]*Y->d2[m][i]);
	  bias->d[m] = b;
	  
	  if (meta)
	  {
		  for (d=0; d<nkernels; d++)
		  {
			  switcher->d2[m][d] = p / (p + (1.0-p)*( texp(-0.5*TW->d[d] ) ));
		  }
	  }
	  else
	  {	   
		  for (d=0; d<nkernels; d++)
		  {
			  switcher->d2[m][d] = p / (p + (1.0-p)*( texp(-0.5*W->d2[m][d] ) ));
		  }
	  }
  }
}

void
parse_args (int argc, char **argv)
{
  int i;
  strcpy(taskFile,"");
  strcpy(inputFile,"");
  strcpy(extensionFile,"ext");
  strcpy(kernelFile,"kernel");
  strcpy(lambdas_fname,"lambdas.");
  strcpy(model_fname,"model.");
  strcpy(switcher_fname,"switcher.");
  strcpy(outputs_fname,"outputs.");
  strcpy(lambdaFile,"");
  
  for(i=1; i<argc; i++)
  {
      if (!strcmp(argv[i], "-input"))
		  strcpy(inputFile, argv[++i]);
      else if (!strcmp(argv[i], "-task"))
		  strcpy(taskFile, argv[++i]);
	  else if (!strcmp(argv[i], "-kernel"))
		  strcpy(kernelFile, argv[++i]);
      else if (!strcmp(argv[i], "-p"))
		  p = atof(argv[++i]);
      else if (!strcmp(argv[i], "-ext"))
		  strcpy(extensionFile, argv[++i]);
      else if (!strcmp(argv[i], "-c"))
	  {
		  c = atof(argv[++i]);
		  if (c<0)
		  {
			  negc =  c;
			  c    =  1.33333333333;
		  }
	  }
      else if (!strcmp(argv[i], "-sigma"))
		  sigma = atof(argv[++i]);
      else if (!strcmp(argv[i], "-ntrain"))
		  ntrain = atoi(argv[++i]);
      else if (!strcmp(argv[i], "-lambda"))
		  strcpy(lambdaFile, argv[++i]);
      else if (!strcmp(argv[i], "-iters"))
		  iters = atoi(argv[++i]);
	  else if (!strcmp(argv[i], "-nonmeta"))
	  {
		  fprintf(stderr,"Meta learning is turned off, all models have own feature selection.\n");
		  meta = 0;
	  }
      else
	{
	  fprintf(stderr,"Usage: %s -input <fname> -task <fname> -kernel <fname> \n",argv[0]);
	  fprintf(stderr,"    -ntrain <int> -ext <fname> -iters <int> -lambda <fname> \n");
	  fprintf(stderr,"    -sigma <double> -p <double> -c <double> -nonmeta \n");
	  fprintf(stderr,"Multitask SVM Feature Selection for Kernel Classification.\n");
	  exit (2);
	}
    }

  strcat(lambdas_fname,extensionFile);
  strcat(model_fname,extensionFile);
  strcat(switcher_fname,extensionFile);
  strcat(outputs_fname,extensionFile);
}


double oldObjective, newObjective, oldObjectiveY;
int    lambdaIndex = -100;
int    oldDir      = -100;
int    dirM        = 0;
int    dirN        = 0;


double
costIF(double lscale)
{
  double changeObjective = -oldObjective;
  int    d=0;
  double lnew,lold,WNEW,TWNEW;

  lold  = oldLambdas->d2[dirM][dirN];
  lnew  = lold + lscale;

  // logZgamma
  changeObjective += logZgammaT(c,lold);
  changeObjective -= logZgammaT(c,lnew);


  // logZb
  changeObjective -= 0.5*sigma*SQRD(lZb1->d[dirM]);
  changeObjective += 0.5*sigma*SQRD(lZb1->d[dirM] - lold*Y->d2[dirM][dirN] + lnew*Y->d2[dirM][dirN]);
  
  // logZetheta  

  if (meta)
  {
	  for (d=0; d<nkernels; d++)
	  { 		  
		  double H11 = K->d3[d][dirN][dirN];
		  double H12 = 0.0;
		  int i;
		  for (i=0;i<N;i++)
		  {
			  if (i!=dirN) H12 += lambdas->d2[dirM][i]*Y->d2[dirM][i]*K->d3[d][dirN][i];
		  }
		  H12 *= Y->d2[dirM][dirN];

		  changeObjective -= lZtheta->d[d];
		  TWNEW  = TW->d[d]-W->d2[dirM][d];
		  WNEW   = W->d2[dirM][d];
		  WNEW  -= SQRD(lold)*H11+2.0*lold*H12;
		  WNEW  += SQRD(lnew)*H11+2.0*lnew*H12;
		  TWNEW += WNEW;
		  changeObjective += 0.5*TWNEW + log(p + (1.0-p)*texp(-0.5*TWNEW));
	  }
  }
  else
  {
	  for (d=0; d<nkernels; d++)
	  { 
		  double H11 = K->d3[d][dirN][dirN];
		  double H12 = 0.0;
		  int i;
		  for (i=0;i<N;i++)
		  {
			  if (i!=dirN) H12 += lambdas->d2[dirM][i]*Y->d2[dirM][i]*K->d3[d][dirN][i];
		  }
		  H12 *= Y->d2[dirM][dirN];

		  WNEW   = W->d2[dirM][d];
		  changeObjective -= 0.5*WNEW + log(p + (1.0-p)*texp(-0.5*WNEW));
		  WNEW   = W->d2[dirM][d];
		  WNEW  -= SQRD(lold)*H11+2.0*lold*H12;
		  WNEW  += SQRD(lnew)*H11+2.0*lnew*H12;
		  changeObjective += 0.5*WNEW + log(p + (1.0-p)*texp(-0.5*WNEW));
	  }
  }

  return(changeObjective);
}


#define ITMAX 100
#define CGOLD 0.3819660
#define ZEPS 1.0e-10
#define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))

// This will minimize the function in the interval ax to cx

double
brent(double ax,double bx,double cx,double (*f)(double),double tol,double *xmin)
{
  int iter;
  double a,b,d,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
  double e=0.0;
  
  a=(ax < cx ? ax : cx);
  b=(ax > cx ? ax : cx);
  x=w=v=bx;
  fw=fv=fx=(*f)(x);
  for (iter=1;iter<=ITMAX;iter++) {
    xm=0.5*(a+b);
    tol2=2.0*(tol1=tol*fabs(x)+ZEPS);
    if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
      *xmin=x;
      return fx;
    }
    if (fabs(e) > tol1) {
      r=(x-w)*(fx-fv);
      q=(x-v)*(fx-fw);
      p=(x-v)*q-(x-w)*r;
      q=2.0*(q-r);
      if (q > 0.0) p = -p;
      q=fabs(q);
      etemp=e;
      e=d;
      if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
	d=CGOLD*(e=(x >= xm ? a-x : b-x));
      else {
	d=p/q;
	u=x+d;
	if (u-a < tol2 || b-u < tol2)
	  d=SIGN(tol1,xm-x);
      }
    } else {
      d=CGOLD*(e=(x >= xm ? a-x : b-x));
    }
    u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
    fu=(*f)(u);
    if (fu <= fx) {
      if (u >= x) a=x; else b=x;
      SHFT(v,w,x,u)
	SHFT(fv,fw,fx,fu)
	} else {
	  if (u < x) a=u; else b=u;
	  if (fu <= fw || w == x) {
	    v=w;
	    w=u;
	    fv=fw;
	    fw=fu;
	  } else if (fu <= fv || v == x || v == w) {
	    v=u;
	    fv=fu;
	  }
	}
  }
  *xmin=x;
  return fx;
}
#undef ITMAX
#undef CGOLD
#undef ZEPS
#undef SHFT
#undef SIGN


double ACC_1  = 0.0;
double ACC_t  = 0.0;
double ACC_tt = 0.0;
double ACC_l  = 0.0;
double ACC_lt = 0.0;
double PAR_M  = 0.0;
double PAR_B  = 0.0;



void
main(int argc, char *argv[]) 
{
  int     i,j,k;
  FILE    *fp;
  char    line[MAXLINE+1];
  char    b2[50];
  int     stop = -1;
  int     dump = -1;
  int     NN;

  // Get Command Line Arguments
  parse_args(argc,argv);

  // Open the taskFile to Determine number of tasks
  if (strlen(taskFile)>1)
  {
	  if ((fp = fopen(taskFile,"r"))==NULL)
	  {
		  fprintf(stderr,"%s can't open %s for input.\n",argv[0],taskFile);
		  exit(0);
	  }
	  else
	  {
		  fgets(line,MAXLINE,fp);
		  istrstream ist(line,strlen(line));
		  M = 0;
		  while (ist>>b2) M++;
		  fclose(fp);
	  }
  }
  else
  {
	  // no task file so the labels must be the last column
	  // of the input file and M=1
	  M = 1;
	  printf("No task file was specified, assuming labels are last column of input file.\n");
  }
  
  
  // Open the inputFile to Determine its Dimensions
  if ((fp = fopen(inputFile,"r"))==NULL)
  {
      fprintf(stderr,"%s can't open %s for input.\n",argv[0],inputFile);
      exit(0);
  }
  else
  {
	  fgets(line,MAXLINE,fp);
	  istrstream ist(line,strlen(line));
	  D = 0;
	  while (ist>>b2) D++;
	  NN = 1;
	  while (fgets(line, MAXLINE, fp) != NULL) 
		  if (strlen(line)>2) NN++;
		  fclose(fp);
  }
  if (strlen(taskFile)<2) D=D-1;

  if (ntrain>0) N = ntrain;
  else N = NN;

  // Allocate the Right Size Arrays
  X             = MatrixCreate(N,D);
  HMMv          = MatrixCreate(N*M,20);
  HMMi          = MatrixCreate(N*M,20);
  HMMt          = MatrixCreate(N*M,20);
  VectorSet((Vector) HMMi,-1.0);
  VectorSet((Vector) HMMv,-1.0);
  VectorSet((Vector) HMMt,-1.0);
  Xt            = MatrixCreate(NN-N,D);  // contains test data...
  Y             = MatrixCreate(M,N);
  Yt            = MatrixCreate(M,NN-N);
  lambdas       = MatrixCreate(M,N);
  oldLambdas    = MatrixCreate(M,N);


  // Read in the inputFile and Load it into X
  fp = fopen(inputFile,"r");
  for (i=0; i<N; i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<D; j++)
	  {
		  ist>>b2;
		  X->d2[i][j] = atof(b2);
	  }
	  if (strlen(taskFile)<2)
	  {
		  ist>>b2;
		  Y->d2[0][i] = atof(b2);
	  }
  } 
  for (i=0; i<(NN-N); i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<D; j++)
	  {
		  ist>>b2;
		  Xt->d2[i][j] = atof(b2);
	  }
	  if (strlen(taskFile)<2)
	  {
		  ist>>b2;
		  Yt->d2[0][i] = atof(b2);
	  }
  }
  fclose(fp);

  
  if (strlen(taskFile)>1)
  {
	  // Read in the taskFile and Load it into Y
	  fp = fopen(taskFile,"r");
	  for (i=0; i<N; i++)
	  {
		  fgets(line, MAXLINE, fp);
		  istrstream ist(line,strlen(line));
		  for (j=0; j<M; j++)
		  {
			  ist>>b2;
			  Y->d2[j][i] = atof(b2);
		  }
	  } 
	  for (i=0; i<(NN-N); i++)
	  {
		  fgets(line, MAXLINE, fp);
		  istrstream ist(line,strlen(line));
		  for (j=0; j<M; j++)
		  {
			  ist>>b2;
			  Yt->d2[j][i] = atof(b2);
		  };
	  }
	  fclose(fp);
  }


  // Open the kernelFile to Determine number of kernels
  if ((fp = fopen(kernelFile,"r"))==NULL)
  {
      fprintf(stderr,"%s can't open %s for input.\n",argv[0],kernelFile);
      exit(0);
  }
  else
  {
	  nkernels=0;
	  while (fgets(line,MAXLINE,fp)!=NULL) nkernels++;
	  fclose(fp);
	  fprintf(stderr,"Number of kernels=%d\n",nkernels);

	  K = VolumeCreate(nkernels,NN,N);
	  fp = fopen(kernelFile,"r");
	  int i,m,n;
	  for (i=0;i<nkernels;i++)
	  {
		  fgets(line,MAXLINE,fp);
		  istrstream ist(line,strlen(line));
		  ist>>b2; kerneltype = atoi(b2);
		  ist>>b2; p1 = atof(b2);
		  ist>>b2; p2 = atof(b2);
		  fprintf(stderr,"ker=%d p1=%e p2=%e\n",kerneltype,p1,p2);
		  for (m=0;m<NN;m++)
			  for (n=0;n<N;n++)
			  {
				  if (m<N) K->d3[i][m][n] = kernel(X,m,X,n);
				  else if (m>=N) K->d3[i][m][n] = kernel(Xt,m-N,X,n);
			  }
	  }
	  fclose(fp);
  }


	  printf("N4.\n");

  // Allocate computational structures
  lZtheta       = VectorCreate(nkernels);
  lZgamma       = MatrixCreate(M,N);
  W             = MatrixCreate(M,nkernels);
  TW            = VectorCreate(nkernels);
  switcher      = MatrixCreate(M,nkernels);
  lZb1          = VectorCreate(M);
  bias          = VectorCreate(M);
  Vector extent = VectorCreate(2);
  

  // Must Learn Lambdas
  VectorSet((Vector) lambdas,0.0001);

  if (strlen(lambdaFile)>2)
  {
      fp = fopen(lambdaFile,"r");
      for (i=0; i<M; i++)
	  {
		  for (j=0; j<N; j++)
		  {
			  fgets(line,MAXLINE,fp);
			  lambdas->d2[i][j] = atof(line);
			  if (lambdas->d2[i][j]<0.0) lambdas->d2[i][j] = 0.0;
			  if (lambdas->d2[i][j]>=(c-TINY)) lambdas->d2[i][j] = c-TINY;
		  }
	  }
      fclose(fp);
  }

  VectorMove((Vector) oldLambdas,(Vector) lambdas);
  VectorSet((Vector) W,0.0);
  VectorSet(TW, 0.0);
  VectorSet(lZtheta,0.0);
  VectorSet((Vector) lZgamma,0.0);
  VectorSet(lZb1,0.0);
  lZb = 0.0;
  lZt = 0.0;





  // Setup
  oldObjective     = -logPartition(lambdas);
  fprintf(stderr,"Initial Objective = %e p=%e\n", oldObjective,p);
  int    direction = 0;
  int    itr       = 1;
  double tolerance = 1e-10;
  double increase  = 0.0;
  int    past      = HMMi->cols;
  double lastCheck = -2.0*fabs(oldObjective);

  if (iters < 0) iters = 64000000;
  


  while (stop<0)
    { 
      // Grab from Keyboard
      if (_kbhit()>0)
	  {
		  Key = _getch();
		  if (Key=='q') stop = 1;
		  if (Key=='s') dump = 1;
	  }
      

      itr++;

	  
      if (itr>iters) stop = 1;



      // Pick a direction to optimize
      double selection = texp(- ((double) itr)/((double) N*M)) + 0.01;
      if ((DRAND>0.2) && (oldDir>=0))
	  {
		  // pick an HMM direction
		  double accV    =  0.0;
		  double sumV    =  0.0;
		  double maxV    = -1e30;
		  int    maxJ    =  0;
		  double value   =  0.0;
		  double decay   =  0.0;
		  double curprob =  0.0;
		  double totprob =  0.0;
		  int    sample  = -100;
		  for (j=0; j<past; j++) 
			  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
			  {
				  value  = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
				  if (value>maxV)
				  {
					  maxV = value;
					  maxJ = j;
				  }
				  sumV  += value;
				  accV  += 1.0;
			  }
			  if (accV>0.0)
			  {
				  decay = accV / (maxV*accV - sumV);
				  for (j=0; j<past; j++) 
					  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
					  {
						  value    = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
						  totprob += decay*texp(-decay*(maxV-value));
					  }
					  totprob *= DRAND;
					  for (j=0; j<past; j++) 
						  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
						  {
							  value    = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
							  curprob += decay*texp(-decay*(maxV-value));
							  if ((sample<0) && (curprob>=totprob)) sample = j;
						  }
						  direction = (int) HMMi->d2[oldDir][sample];
						  direction = (int) HMMi->d2[oldDir][maxJ];
						  if (direction>=(N*M)) direction = N*M-1;
						  if (direction<0) direction = 0;
			  }
			  else
			  {
				  // pick a random direction
				  direction = (int) (DRAND*(((double) N*M)+1.0));
				  if (direction>=(N*M)) direction = N*M-1;
				  if (direction<0) direction = 0;
			  }
	  }
      else
	  {
		  // pick a random direction
		  direction = (int) (DRAND*(((double) N*M)+1.0));
		  if (direction>=(N*M)) direction = N*M-1;
		  if (direction<0) direction = 0;
	  }
	  



	  // Convert direction into dirN and dirM
	  dirM = direction/(N);
	  dirN = direction-N*dirM;

      // Do line search
      double lnow;
      lnow = oldLambdas->d2[dirM][dirN];
      lambdaIndex = direction;
      double minf,xmin;
      minf = brent(-lnow+VTINY,0.0,c-lnow-VTINY, &costIF, tolerance, &xmin);



      // Update cost function and vector of lambdas
      increase = -oldObjective;
	  oldObjective = -logPartitionFast(dirN,dirM,oldLambdas->d2[dirM][dirN]+xmin,oldLambdas->d2[dirM][dirN]);
	  oldLambdas->d2[dirM][dirN] += xmin;
	  if (oldLambdas->d2[dirM][dirN]<0.0) fprintf(stderr,"bad dirm=%d dirn=%d\n",dirM,dirN); // oldLambdas->d2[dirM][dirN]=0.0;
	  lambdas->d2[dirM][dirN]     = oldLambdas->d2[dirM][dirN];
	  

	  // double slowobj = -logPartition(lambdas);
	  // fprintf(stderr,"fast=%e slow=%e diff=%e\n",oldObjective,slowobj, oldObjective-slowobj);

      increase += oldObjective;
      if ((itr%(N+100))==0)
	  {
		  printf("%e %d\n",oldObjective,itr); 
		  fflush(stdout); 
	  }
      if ((itr%(10*N))==0)
	  {
		  	if (((oldObjective-lastCheck)<=(0.00000002)*fabs(oldObjective)))
			{
				if (negc<0)
				{
					if (c>fabs(negc))
						stop = 1;
					else
					{
						c = c*1.2247;
						printf("JUST SWITCHED TO c=%e\n",c); 
						dump = 1;
					}
				}
				else stop = 1;
			}
			lastCheck = oldObjective;
	  }
      

      // Add to the history model the current data point. Replace the least valuable previous one.
      if (oldDir>=0)
	  {
		  int worst = -5;
		  double t;
		  double value = 0.0;
		  double lowestvalue = 1e30;
		  for (j=0; j<past; j++) if (HMMi->d2[oldDir][j]==lambdaIndex) worst = j;
		  if (worst<0) 
		  {
			  for (j=0; j<past; j++) if (HMMi->d2[oldDir][j]<0) worst = j;
		  }
		  if (worst<0)
		  {
			  for (j=0; j<past; j++)
			  {
				  value = HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B;
				  if (value<lowestvalue)
				  {
					  worst       = j;
					  lowestvalue = value;
				  }
			  }
		  }
		  if (worst<0) worst = 0;
		  HMMi->d2[oldDir][worst] = lambdaIndex;
		  HMMv->d2[oldDir][worst] = log(MAX(increase,1e-10));
		  HMMt->d2[oldDir][worst] = itr;
		  t = itr;
		  ACC_1  = t;
		  ACC_t  = ACC_t + t;
		  ACC_tt = ACC_tt + t*t;
		  ACC_l  = ACC_l + log(MAX(increase,1e-10));
		  ACC_lt = ACC_lt + log(MAX(increase,1e-10))*t;
		  PAR_M  = (ACC_lt - ACC_t*ACC_l/ACC_1) / (ACC_tt - ACC_t*ACC_t/ACC_1);
		  PAR_B  = (ACC_l - PAR_M*ACC_t) / ACC_1;
	  }
      oldDir = lambdaIndex;





      // Check if any output needs to be generated
      if ((stop==1) || (dump==1))
	  { 
		  // Save the lambdas
		  fp = fopen(lambdas_fname,"w");
		  MatrixPrint(fp,lambdas);
		  fclose(fp);
		  
		  // Compute the actual final linear model and bias
		  newObjective = -logPartition(lambdas);
		  FILE *fplog = fopen("log","a");
		  for (k=0; k<argc; k++) fprintf(fplog,"%s ",argv[k]);
		  fprintf(fplog,"\n");
		  fprintf(stderr,"J=%e i=%d ",newObjective,itr);
		  getModel(lambdas);
		  fp = fopen(model_fname,"w");
		  VectorPrint(fp,bias);
		  fprintf(fp,"\n");
		  fclose(fp);
		  fp = fopen(switcher_fname,"w");
		  MatrixPrint(fp,switcher);
		  fclose(fp);
		  fprintf(fp,"\n");
		  
		  // Recompute the newoutputs and the prediction error
		  int    d,m;
		  double val;
		  double err1 = 0.0;
		  double err2 = 0.0; 
		  fp = fopen(outputs_fname,"w");
		  for (k=0; k<N; k++)
		  {
			  for (m=0; m<M; m++)
			  {
				  val = bias->d[m];
				  for (d=0; d<nkernels; d++)
					  for (j=0; j<N; j++) val += switcher->d2[m][d]*(lambdas->d2[m][j]*Y->d2[m][j]*K->d3[d][k][j]);
				  fprintf(fp,"%e ",val);
				  if ((val*Y->d2[m][k])<=0.0) err1++;
			  }
			  fprintf(fp,"\n");
		  }
		  fprintf(fplog,"%d %e %e %e ",meta,c,p,err1);
		  fprintf(stderr,"c=%e err1=%e ",c,err1);
		  for (k=0; k<(NN-N); k++)
		  {
			  for (m=0; m<M; m++)
			  {
				  val = bias->d[m];
				  for (d=0; d<nkernels; d++)
					  for (j=0; j<N; j++) val += switcher->d2[m][d]*(lambdas->d2[m][j]*Y->d2[m][j]*K->d3[d][k+N][j]);
				  fprintf(fp,"%e ",val);
				  if ((val*Yt->d2[m][k])<=0.0) err2++;
			  }
			  fprintf(fp,"\n");
		  }
		  fprintf(fplog,"%e \n",err2);
		  fprintf(stderr,"err2=%e \n",err2);

		  fclose(fplog);
		  fclose(fp);
		  
		  dump = -1;
	  }

    }

	  

  // Free Arrays
  MatrixFree(X);
  MatrixFree(HMMv);
  MatrixFree(HMMi);
  MatrixFree(HMMt);
  MatrixFree(Xt);
  MatrixFree(Y);
  MatrixFree(Yt);
  VolumeFree(K);
  MatrixFree(lambdas);
  MatrixFree(oldLambdas);
  VectorFree(lZtheta);
  MatrixFree(lZgamma);
  MatrixFree(W);
  VectorFree(TW);
  MatrixFree(switcher);
  VectorFree(lZb1);
  VectorFree(bias);
  VectorFree(extent);
}


