// metasvmc.cpp 
//
// 
//
//


// Does Multi-Task SVM classification with feature selection
// in a meta-learning scenario. This version assumes that
// the inputs are actually the same for each task and
// only the outputs vary.



#include "stdafx.h"

#ifndef DRAND
#define DRAND ((double)rand()/(double)RAND_MAX)
#endif

#ifndef M_PI
#define M_PI 3.1415927
#endif


#ifndef SQRD
#define SQRD(x) ((x)*(x))
#endif



#define MAXLINE  500000
#define VTINY     1e-9
#define TINY      1e-3

// For Keyboard Interface
int TermMask;
int Key;
int ch;

// Command Line Arguments
char   inputFile[256];
char   taskFile[256];
char   extensionFile[256];
char   lambdas_fname[256];
char   model_fname[256];
char   switcher_fname[256];
char   outputs_fname[256];
char   lambdaFile[256];
int    ntrain = -1;
double p     = 1.0;    // probability the feature is on
double c     = 30.0;
double negc  = 10.0;
double sigma = 3.0;
int    iters = -5;
int    meta  = 1; // Coupled feature selection or not...

double
texp(double c)
{
	if (c<-500.0)
	{
		c = -500.0;
	}
	else if (c>200.0)
	{
		fprintf(stderr,"OVERFLOWING exp(%e)\n",c);
	}
	return(exp(c));
}

double
logZgammaT(double c, double lt)
{
	return( lt + log(1.0-lt/c) );
}

// Data, Parameters and Costs
int    N, D, M; // N = # of data points, D = dimensions, M = # of tasks
Matrix X, Xt;
Matrix HMMi;
Matrix HMMv;
Matrix HMMt;
Matrix Y, Yt;
Matrix W;
Vector TW;
Matrix lambdas;
Matrix oldLambdas;
Vector lZtheta;
Matrix lZgamma;

Vector lZb1;
Vector bias;
double lZb;
double lZgam;
double lZt;
Matrix linear;
Matrix switcher;



double
logPartitionFast(int INDEX, int TASK, double newl, double oldl)
{
	int d;
	int I;
	double val=0.0;
	
	I = INDEX;
	lZgam += lZgamma->d2[TASK][I];
	lZgamma->d2[TASK][I] = logZgammaT(c,newl);
	lZgam -= lZgamma->d2[TASK][I];
	
	if (meta)
	{
		for (d=0; d<D; d++)
		{
			TW->d[d]       -= SQRD(W->d2[TASK][d]);
			W->d2[TASK][d] += (newl-oldl)*Y->d2[TASK][I]*X->d2[I][d];
			TW->d[d]       += SQRD(W->d2[TASK][d]);
			lZtheta->d[d]   = 0.5*TW->d[d] + log(p + (1.0-p)*texp(-0.5*TW->d[d]));
		}
	}
	else
	{
		for (d=0; d<D; d++)
		{
			lZtheta->d[d]  -= 0.5*SQRD(W->d2[TASK][d]) + log(p+(1.0-p)*texp(-0.5*SQRD(W->d2[TASK][d])));
			W->d2[TASK][d] += (newl-oldl)*Y->d2[TASK][I]*X->d2[I][d];
			lZtheta->d[d]  += 0.5*SQRD(W->d2[TASK][d]) + log(p+(1.0-p)*texp(-0.5*SQRD(W->d2[TASK][d])));
		}
	}
	
	lZb           -= 0.5*sigma*SQRD(lZb1->d[TASK]);
	lZb1->d[TASK] += (newl-oldl)*Y->d2[TASK][I];
	lZb           += 0.5*sigma*SQRD(lZb1->d[TASK]);
	lZt   = VectorSum(lZtheta);
	val   = lZgam+lZt+lZb;
	return(val);
}

double
logPartition(Matrix l)
{
	int i,m,d;
	double total=0.0;
	
	for (m=0; m<M; m++)
	{
		for (i=0; i<N; i++) lZgamma->d2[m][i] = logZgammaT(c,l->d2[m][i]);
	}
	lZgam = -VectorSum((Vector) lZgamma);
	
	lZb = 0.0;
	for (m=0; m<M; m++)
	{
		double val = 0.0;
		for (i=0; i<N; i++) val += l->d2[m][i]*Y->d2[m][i];
		lZb1->d[m] = val;
		lZb   += 0.5*sigma*val*val;
	}
	

	if (meta)
	{
		for (d=0; d<D; d++)
		{
			TW->d[d] = 0.0;
			for (m=0; m<M; m++)
			{
				double val = 0.0;
				for (i=0; i<N; i++) val += (l->d2[m][i]*Y->d2[m][i])*X->d2[i][d];
				W->d2[m][d] = val;
				TW->d[d] += SQRD(val);
			}
			lZtheta->d[d] = 0.5*TW->d[d] + log(p + (1.0-p)*texp(-0.5*TW->d[d]));
		}
	}
	else
	{
		for (d=0; d<D; d++)
		{
			lZtheta->d[d] = 0.0;
			for (m=0; m<M; m++)
			{
				double val = 0.0;
				for (i=0; i<N; i++) val += (l->d2[m][i]*Y->d2[m][i])*X->d2[i][d];
				W->d2[m][d] = val;
				lZtheta->d[d] += 0.5*SQRD(W->d2[m][d]) + log(p+(1.0-p)*texp(-0.5*SQRD(W->d2[m][d])));
			}
		}
	}

	lZt   = VectorSum(lZtheta);
	total = lZgam+lZt+lZb;
	return(total);
}

#ifndef MIN
#define MIN(x,y)  ( (x)<(y)  ?   (x):(y) )
#endif

#ifndef MAX
#define MAX(x,y)  ( (x)>(y)  ?   (x):(y) )
#endif

void
getModel(Matrix l, Matrix linear, Vector bias)
{
  int i, m, d;
  for (m=0; m<M; m++)
  {
	  double b=0;
	  for (i=0; i<N; i++) b += sigma*(l->d2[m][i]*Y->d2[m][i]);
	  bias->d[m] = b;
	  
	  if (meta)
	  {
		  for (d=0; d<D; d++)
		  {
			  linear->d2[m][d]   = p*W->d2[m][d] / (p + (1.0-p)*( texp(-0.5*TW->d[d] ) ));
			  switcher->d2[m][d] = p / (p + (1.0-p)*( texp(-0.5*TW->d[d] ) ));
		  }
	  }
	  else
	  {	   
		  for (d=0; d<D; d++)
		  {
			  linear->d2[m][d]   = p*W->d2[m][d] / (p + (1.0-p)*( texp(-0.5*SQRD(W->d2[m][d]) ) ));
			  switcher->d2[m][d] = p / (p + (1.0-p)*( texp(-0.5*SQRD(W->d2[m][d]) ) ));
		  }
	  }
  }
  
}

void
parse_args (int argc, char **argv)
{
  int i;
  strcpy(taskFile,"");
  strcpy(inputFile,"");
  strcpy(extensionFile,"ext");
  strcpy(lambdas_fname,"lambdas.");
  strcpy(model_fname,"model.");
  strcpy(switcher_fname,"switcher.");
  strcpy(outputs_fname,"outputs.");
  strcpy(lambdaFile,"");
  
  for(i=1; i<argc; i++)
  {
      if (!strcmp(argv[i], "-input"))
		  strcpy(inputFile, argv[++i]);
      else if (!strcmp(argv[i], "-task"))
		  strcpy(taskFile, argv[++i]);
      else if (!strcmp(argv[i], "-p"))
		  p = atof(argv[++i]);
      else if (!strcmp(argv[i], "-ext"))
		  strcpy(extensionFile, argv[++i]);
      else if (!strcmp(argv[i], "-c"))
	  {
		  c = atof(argv[++i]);
		  if (c<0)
		  {
			  negc =  c;
			  c    =  1.33333333333;
		  }
	  }
      else if (!strcmp(argv[i], "-sigma"))
		  sigma = atof(argv[++i]);
      else if (!strcmp(argv[i], "-ntrain"))
		  ntrain = atoi(argv[++i]);
      else if (!strcmp(argv[i], "-lambda"))
		  strcpy(lambdaFile, argv[++i]);
      else if (!strcmp(argv[i], "-iters"))
		  iters = atoi(argv[++i]);
	  else if (!strcmp(argv[i], "-nonmeta"))
	  {
		  fprintf(stderr,"Meta learning is turned off, all models have own feature selection.\n");
		  meta = 0;
	  }
      else
	{
	  fprintf(stderr,"Usage: %s -input <fname> -task <fname> \n",argv[0]);
	  fprintf(stderr,"    -ntrain <int> -ext <fname> -iters <int> -lambda <fname> \n");
	  fprintf(stderr,"    -sigma <double> -p <double> -c <double> -nonmeta \n");
	  fprintf(stderr,"Multitask SVM Feature Selection for Linear Classification.\n");
	  exit (2);
	}
    }

  strcat(lambdas_fname,extensionFile);
  strcat(model_fname,extensionFile);
  strcat(switcher_fname,extensionFile);
  strcat(outputs_fname,extensionFile);
}


double oldObjective, newObjective, oldObjectiveY;
int    lambdaIndex = -100;
int    oldDir      = -100;
int    dirM        = 0;
int    dirN        = 0;


double
costIF(double lscale)
{
  double changeObjective = -oldObjective;
  int    d=0;
  double lnew,lold,WNEW,TWNEW;

  lold  = oldLambdas->d2[dirM][dirN];
  lnew  = lold + lscale;

  // logZgamma
  changeObjective += logZgammaT(c,lold);
  changeObjective -= logZgammaT(c,lnew);


  // logZb
  changeObjective -= 0.5*sigma*SQRD(lZb1->d[dirM]);
  changeObjective += 0.5*sigma*SQRD(lZb1->d[dirM] - lold*Y->d2[dirM][dirN] + lnew*Y->d2[dirM][dirN]);
  
  // logZetheta
  if (meta)
  {
	  for (d=0; d<D; d++)
	  { 
		  changeObjective -= lZtheta->d[d];
		  TWNEW  = TW->d[d]-SQRD(W->d2[dirM][d]);
		  WNEW   = W->d2[dirM][d] + (lnew-lold)*Y->d2[dirM][dirN]*X->d2[dirN][d];
		  TWNEW += SQRD(WNEW);
		  changeObjective += 0.5*TWNEW + log(p + (1.0-p)*texp(-0.5*TWNEW));
	  }
  }
  else
  {
	  for (d=0; d<D; d++)
	  { 
		  WNEW   = W->d2[dirM][d];
		  changeObjective -= 0.5*SQRD(WNEW) + log(p + (1.0-p)*texp(-0.5*SQRD(WNEW)));
		  WNEW   = W->d2[dirM][d] + (lnew-lold)*Y->d2[dirM][dirN]*X->d2[dirN][d];
		  changeObjective += 0.5*SQRD(WNEW) + log(p + (1.0-p)*texp(-0.5*SQRD(WNEW)));
	  }
  }

  return(changeObjective);
}


#define ITMAX 100
#define CGOLD 0.3819660
#define ZEPS 1.0e-10
#define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d);
#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a))

// This will minimize the function in the interval ax to cx

double
brent(double ax,double bx,double cx,double (*f)(double),double tol,double *xmin)
{
  int iter;
  double a,b,d,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm;
  double e=0.0;
  
  a=(ax < cx ? ax : cx);
  b=(ax > cx ? ax : cx);
  x=w=v=bx;
  fw=fv=fx=(*f)(x);
  for (iter=1;iter<=ITMAX;iter++) {
    xm=0.5*(a+b);
    tol2=2.0*(tol1=tol*fabs(x)+ZEPS);
    if (fabs(x-xm) <= (tol2-0.5*(b-a))) {
      *xmin=x;
      return fx;
    }
    if (fabs(e) > tol1) {
      r=(x-w)*(fx-fv);
      q=(x-v)*(fx-fw);
      p=(x-v)*q-(x-w)*r;
      q=2.0*(q-r);
      if (q > 0.0) p = -p;
      q=fabs(q);
      etemp=e;
      e=d;
      if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
	d=CGOLD*(e=(x >= xm ? a-x : b-x));
      else {
	d=p/q;
	u=x+d;
	if (u-a < tol2 || b-u < tol2)
	  d=SIGN(tol1,xm-x);
      }
    } else {
      d=CGOLD*(e=(x >= xm ? a-x : b-x));
    }
    u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d));
    fu=(*f)(u);
    if (fu <= fx) {
      if (u >= x) a=x; else b=x;
      SHFT(v,w,x,u)
	SHFT(fv,fw,fx,fu)
	} else {
	  if (u < x) a=u; else b=u;
	  if (fu <= fw || w == x) {
	    v=w;
	    w=u;
	    fv=fw;
	    fw=fu;
	  } else if (fu <= fv || v == x || v == w) {
	    v=u;
	    fv=fu;
	  }
	}
  }
  *xmin=x;
  return fx;
}
#undef ITMAX
#undef CGOLD
#undef ZEPS
#undef SHFT
#undef SIGN


double ACC_1  = 0.0;
double ACC_t  = 0.0;
double ACC_tt = 0.0;
double ACC_l  = 0.0;
double ACC_lt = 0.0;
double PAR_M  = 0.0;
double PAR_B  = 0.0;



void
main(int argc, char *argv[]) 
{
  int     i,j,k;
  FILE    *fp;
  char    line[MAXLINE+1];
  char    b2[50];
  int     stop = -1;
  int     dump = -1;
  int     NN;

  // Get Command Line Arguments
  parse_args(argc,argv);

  // Open the taskFile to Determine number of tasks
  if ((fp = fopen(taskFile,"r"))==NULL)
  {
      fprintf(stderr,"%s can't open %s for input.\n",argv[0],taskFile);
      exit(0);
  }
  else
  {
	  fgets(line,MAXLINE,fp);
	  istrstream ist(line,strlen(line));
	  M = 0;
	  while (ist>>b2) M++;
	  fclose(fp);
  }
  
  
  // Open the inputFile to Determine its Dimensions
  if ((fp = fopen(inputFile,"r"))==NULL)
  {
      fprintf(stderr,"%s can't open %s for input.\n",argv[0],inputFile);
      exit(0);
  }
  else
  {
	  fgets(line,MAXLINE,fp);
	  istrstream ist(line,strlen(line));
	  D = 0;
	  while (ist>>b2) D++;
	  NN = 1;
	  while (fgets(line, MAXLINE, fp) != NULL) 
		  if (strlen(line)>2) NN++;
		  fclose(fp);
  }

  if (ntrain>0) N = ntrain;
  else N = NN;

  // Allocate the Right Size Arrays
  X             = MatrixCreate(N,D);
  HMMv          = MatrixCreate(N*M,20);
  HMMi          = MatrixCreate(N*M,20);
  HMMt          = MatrixCreate(N*M,20);
  VectorSet((Vector) HMMi,-1.0);
  VectorSet((Vector) HMMv,-1.0);
  VectorSet((Vector) HMMt,-1.0);
  Xt            = MatrixCreate(NN-N,D);  // contains test data...
  Y             = MatrixCreate(M,N);
  Yt            = MatrixCreate(M,NN-N);
  lambdas       = MatrixCreate(M,N);
  oldLambdas    = MatrixCreate(M,N);
  lZtheta       = VectorCreate(D);
  lZgamma       = MatrixCreate(M,N);
  W             = MatrixCreate(M,D);
  TW            = VectorCreate(D);
  linear        = MatrixCreate(M,D);
  switcher      = MatrixCreate(M,D);
  lZb1          = VectorCreate(M);
  bias          = VectorCreate(M);
  Vector extent = VectorCreate(2);
  

  // Read in the inputFile and Load it into X
  fp = fopen(inputFile,"r");
  for (i=0; i<N; i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<D; j++)
	  {
		  ist>>b2;
		  X->d2[i][j] = atof(b2);
	  }
  } 
  for (i=0; i<(NN-N); i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<D; j++)
	  {
		  ist>>b2;
		  Xt->d2[i][j] = atof(b2);
	  };
  }
  fclose(fp);

  // Read in the taskFile and Load it into Y
  fp = fopen(taskFile,"r");
  for (i=0; i<N; i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<M; j++)
	  {
		  ist>>b2;
		  Y->d2[j][i] = atof(b2);
	  }
  } 
  for (i=0; i<(NN-N); i++)
  {
      fgets(line, MAXLINE, fp);
      istrstream ist(line,strlen(line));
      for (j=0; j<M; j++)
	  {
		  ist>>b2;
		  Yt->d2[j][i] = atof(b2);
	  };
  }
  fclose(fp);
  
  // Must Learn Lambdas
  VectorSet((Vector) lambdas,0.0001);

  if (strlen(lambdaFile)>2)
  {
      fp = fopen(lambdaFile,"r");
      for (i=0; i<M; i++)
	  {
		  for (j=0; j<N; j++)
		  {
			  fgets(line,MAXLINE,fp);
			  lambdas->d2[i][j] = atof(line);
			  if (lambdas->d2[i][j]<0.0) lambdas->d2[i][j] = 0.0;
			  if (lambdas->d2[i][j]>=(c-TINY)) lambdas->d2[i][j] = c-TINY;
		  }
	  }
      fclose(fp);
  }

  VectorMove((Vector) oldLambdas,(Vector) lambdas);
  VectorSet((Vector) W,0.0);
  VectorSet(TW, 0.0);
  VectorSet(lZtheta,0.0);
  VectorSet((Vector) lZgamma,0.0);
  VectorSet(lZb1,0.0);
  lZb = 0.0;
  lZt = 0.0;





  // Setup
  oldObjective     = -logPartition(lambdas);
  fprintf(stderr,"Initial Objective = %e p=%e\n", oldObjective,p);
  int    direction = 0;
  int    itr       = 1;
  double tolerance = 1e-10;
  double increase  = 0.0;
  int    past      = HMMi->cols;
  double lastCheck = -2.0*fabs(oldObjective);

  if (iters < 0) iters = 64000000;
  


  while (stop<0)
    { 
      // Grab from Keyboard
      if (_kbhit()>0)
	  {
		  Key = _getch();
		  if (Key=='q') stop = 1;
		  if (Key=='s') dump = 1;
	  }
      

      itr++;

	  
      if (itr>iters) stop = 1;



      // Pick a direction to optimize
      double selection = texp(- ((double) itr)/((double) N*M)) + 0.01;
      if ((DRAND>0.2) && (oldDir>=0))
	  {
		  // pick an HMM direction
		  double accV    =  0.0;
		  double sumV    =  0.0;
		  double maxV    = -1e30;
		  int    maxJ    =  0;
		  double value   =  0.0;
		  double decay   =  0.0;
		  double curprob =  0.0;
		  double totprob =  0.0;
		  int    sample  = -100;
		  for (j=0; j<past; j++) 
			  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
			  {
				  value  = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
				  if (value>maxV)
				  {
					  maxV = value;
					  maxJ = j;
				  }
				  sumV  += value;
				  accV  += 1.0;
			  }
			  if (accV>0.0)
			  {
				  decay = accV / (maxV*accV - sumV);
				  for (j=0; j<past; j++) 
					  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
					  {
						  value    = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
						  totprob += decay*texp(-decay*(maxV-value));
					  }
					  totprob *= DRAND;
					  for (j=0; j<past; j++) 
						  if ((HMMi->d2[oldDir][j]>=0) && (HMMi->d2[oldDir][j]!=oldDir))
						  {
							  value    = (HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B);
							  curprob += decay*texp(-decay*(maxV-value));
							  if ((sample<0) && (curprob>=totprob)) sample = j;
						  }
						  direction = (int) HMMi->d2[oldDir][sample];
						  direction = (int) HMMi->d2[oldDir][maxJ];
						  if (direction>=(N*M)) direction = N*M-1;
						  if (direction<0) direction = 0;
			  }
			  else
			  {
				  // pick a random direction
				  direction = (int) (DRAND*(((double) N*M)+1.0));
				  if (direction>=(N*M)) direction = N*M-1;
				  if (direction<0) direction = 0;
			  }
	  }
      else
	  {
		  // pick a random direction
		  direction = (int) (DRAND*(((double) N*M)+1.0));
		  if (direction>=(N*M)) direction = N*M-1;
		  if (direction<0) direction = 0;
	  }
	  



	  // Convert direction into dirN and dirM
	  dirM = direction/(N);
	  dirN = direction-N*dirM;

      // Do line search
      double lnow;
      lnow = oldLambdas->d2[dirM][dirN];
      lambdaIndex = direction;
      double minf,xmin;
      minf = brent(-lnow+VTINY,0.0,c-lnow-VTINY, &costIF, tolerance, &xmin);



	  // fprintf(stderr,"%e %e %e %e lnow=%e\n",ax,bx,cx,xmin,lnow);

      // Update cost function and vector of lambdas
      increase = -oldObjective;
	  oldObjective = -logPartitionFast(dirN,dirM,oldLambdas->d2[dirM][dirN]+xmin,oldLambdas->d2[dirM][dirN]);
	  oldLambdas->d2[dirM][dirN] += xmin;
	  if (oldLambdas->d2[dirM][dirN]<0.0) fprintf(stderr,"bad dirm=%d dirn=%d\n",dirM,dirN); // oldLambdas->d2[dirM][dirN]=0.0;
	  lambdas->d2[dirM][dirN]     = oldLambdas->d2[dirM][dirN];
	  
	  // double slowobj = -logPartition(lambdas,lambdasP);
	  // fprintf(stderr,"fast=%e slow=%e diff=%e\n",oldObjective,slowobj, oldObjective-slowobj);

      increase += oldObjective;
      if ((itr%(N+100))==0)
	  {
		  printf("%e %d\n",oldObjective,itr); 
		  fflush(stdout); 
	  }
      if ((itr%(10*N))==0)
	  {
		  	if (((oldObjective-lastCheck)<=(0.00000002)*fabs(oldObjective)))
			{
				if (negc<0)
				{
					if (c>fabs(negc))
						stop = 1;
					else
					{
						c = c*1.2247;
						printf("JUST SWITCHED TO c=%e\n",c); 
						dump = 1;
					}
				}
				else stop = 1;
			}
			lastCheck = oldObjective;
	  }
      

      // Add to the history model the current data point. Replace the least valuable previous one.
      if (oldDir>=0)
	  {
		  int worst = -5;
		  double t;
		  double value = 0.0;
		  double lowestvalue = 1e30;
		  for (j=0; j<past; j++) if (HMMi->d2[oldDir][j]==lambdaIndex) worst = j;
		  if (worst<0) 
		  {
			  for (j=0; j<past; j++) if (HMMi->d2[oldDir][j]<0) worst = j;
		  }
		  if (worst<0)
		  {
			  for (j=0; j<past; j++)
			  {
				  value = HMMv->d2[oldDir][j] - HMMt->d2[oldDir][j]*PAR_M - PAR_B;
				  if (value<lowestvalue)
				  {
					  worst       = j;
					  lowestvalue = value;
				  }
			  }
		  }
		  if (worst<0) worst = 0;
		  HMMi->d2[oldDir][worst] = lambdaIndex;
		  HMMv->d2[oldDir][worst] = log(increase+1e-10);
		  HMMt->d2[oldDir][worst] = itr;
		  t = itr;
		  ACC_1  = t;
		  ACC_t  = ACC_t + t;
		  ACC_tt = ACC_tt + t*t;
		  ACC_l  = ACC_l + log(increase+1e-10);
		  ACC_lt = ACC_lt + log(increase+1e-10)*t;
		  PAR_M  = (ACC_lt - ACC_t*ACC_l/ACC_1) / (ACC_tt - ACC_t*ACC_t/ACC_1);
		  PAR_B  = (ACC_l - PAR_M*ACC_t) / ACC_1;
	  }
      oldDir = lambdaIndex;





      // Check if any output needs to be generated
      if ((stop==1) || (dump==1))
	  { 
		  // Save the lambdas
		  fp = fopen(lambdas_fname,"w");
		  MatrixPrint(fp,lambdas);
		  fclose(fp);
		  
		  // Compute the actual final linear model and bias
		  newObjective = -logPartition(lambdas);
		  FILE *fplog = fopen("log","a");
		  for (k=0; k<argc; k++) fprintf(fplog,"%s ",argv[k]);
		  fprintf(fplog,"\n");
		  fprintf(stderr,"J=%e i=%d ",newObjective,itr);
		  getModel(lambdas,linear,bias);
		  fp = fopen(model_fname,"w");
		  MatrixPrint(fp,linear);
		  VectorPrint(fp,bias);
		  fprintf(fp,"\n");
		  fclose(fp);
		  fp = fopen(switcher_fname,"w");
		  MatrixPrint(fp,switcher);
		  fclose(fp);
		  fprintf(fp,"\n");
		  
		  // Recompute the newoutputs and the prediction error
		  int    m;
		  double val;
		  double err1 = 0.0;
		  double err2 = 0.0; 
		  fp = fopen(outputs_fname,"w");
		  for (k=0; k<N; k++)
		  {
			  for (m=0; m<M; m++)
			  {
				  val = bias->d[m];
				  for (j=0; j<D; j++) val += linear->d2[m][j]*X->d2[k][j];
				  fprintf(fp,"%e ",val);
				  if (val*Y->d2[m][k]<0.0) err1++;
			  }
			  fprintf(fp,"\n");
		  }
		  fprintf(fplog,"%d %e %e %e ",meta,c,p,err1);
		  fprintf(stderr,"c=%e err1=%e ",c,err1);
		  for (k=0; k<(NN-N); k++)
		  {
			  for (m=0; m<M; m++)
			  {
				  val = bias->d[m];
				  for (j=0; j<D; j++) val += linear->d2[m][j]*Xt->d2[k][j];
				  fprintf(fp,"%e ",val);
				  if (val*Yt->d2[m][k]<0.0) err2++;
			  }
			  fprintf(fp,"\n");
		  }
		  fprintf(fplog,"%e \n",err2);
		  fprintf(stderr,"err2=%e \n",err2);

		  fclose(fplog);
		  fclose(fp);
		  
		  dump = -1;
	  }

    }

	  

  // Free Arrays
  MatrixFree(X);
  MatrixFree(HMMv);
  MatrixFree(HMMi);
  MatrixFree(HMMt);
  MatrixFree(Xt);
  MatrixFree(Y);
  MatrixFree(Yt);
  MatrixFree(lambdas);
  MatrixFree(oldLambdas);
  VectorFree(lZtheta);
  MatrixFree(lZgamma);
  MatrixFree(W);
  VectorFree(TW);
  MatrixFree(linear);
  MatrixFree(switcher);
  VectorFree(lZb1);
  VectorFree(bias);
  VectorFree(extent);
}


