#include "xbasic_types.h"
#include "xio.h"
#include "coeff2048.c"  // coefficients must be calculated for FFT size

#define FFT_SIZE 2048

extern Xuint32 coeff[];


/****************************************************************************/
// nonswaps[] identifies indices that do not get affected during bitswapping.
//            index 0 holds the total # values in array


/*
const Xuint16 nonswaps256[] =  
   { 17, 
     1,   49,  73,  121, 133, 181, 205, 253, 259, 307, 331, 379,
     391, 439, 463, 511, 513 };

const Xuint16 nonswaps1024[] = 
{ 33, 
  1, 97, 145, 241, 265, 361, 409, 505, 517, 613, 661, 757, 781, 877, 925, 1021, 
  1027, 1123, 1171, 1267, 1291, 1387, 1435, 1531, 1543, 1639, 1687, 1783, 
  1807, 1903, 1951, 2047, 2049 };
*/

const Xuint16 nonswaps[] = 
{ 65, 
  1,    65,   161,  225,  273,  337,  433,  497,  521,  585,  681,  745,  793, 
  857,  953,  1017, 1029, 1093, 1189, 1253, 1301, 1365, 1461, 1525, 1549, 1613, 
  1709, 1773, 1821, 1885, 1981, 2045, 2051, 2115, 2211, 2275, 2323, 2387, 2483, 
  2547, 2571, 2635, 2731, 2795, 2843, 2907, 3003, 3067, 3079, 3143, 3239, 3303, 
  3351, 3415, 3511, 3575, 3599, 3663, 3759, 3823, 3871, 3935, 4031, 4095, 4097 };
/****************************************************************************/


/**
 * takes buffer data in dataL, in form 0xLLLLRRRR
 * separates L and R data and performs bit reversal of indices
 * returns two arrays, each ready to be FFT-ized
 * ---note: as with the FFT algorithm, this function will never access idx[0]
 * fills all imaginary (even) indices with 0x0000.
 */
void bitReverseAndLRSeparate( Xint16 *dataL, Xint16 *dataR ) {
  int i, j, n, m;
  Xint16 tempL, tempR;

  n = FFT_SIZE << 1;
  j = 1;
  
  for ( i=1; i < n; i+=2 ) {
    if ( j > i ) {

      tempL = dataL[j];
      tempR = dataL[j+1];

      dataL[j] = dataL[i];
      dataR[j] = dataL[i+1];

      dataL[i] = tempL;
      dataR[i] = tempR;

      dataL[i+1] = 0x0000;
      dataL[j+1] = 0x0000;
      dataR[i+1] = 0x0000;
      dataR[j+1] = 0x0000;

    }
    
    m = FFT_SIZE;
    while ( (m >= 2) && (j > m) ) {
      j -= m;
      m >>= 1;
    }

    j += m;
  }

  // separate L and R for the indices that don't get bitreversed
  for (i=1; i < nonswaps[0]; i++) {
    dataR[ nonswaps[i] ] = dataL[ nonswaps[i]+1 ];

    dataL[ nonswaps[i]+1 ] = 0x0000;
    dataR[ nonswaps[i]+1 ] = 0x0000;
  }
}


// takes an array of size (FFT_SIZE * 2), of 16-bit values
// this function WILL NEVER access array index 0 (very important-no seg faults)
void doFFT( Xint16 *data ) {
  //  int istep, mmax, j, i, m, n, coeff_ofs, z;
  Xint32 istep, mmax, j, i, m, n, coeff_ofs, z;
  Xint32 itr = 0;

  Xint16 *tempRI;
  Xuint32 temp;
  Xuint32* temp32;

  coeff_ofs = 0;

  n = FFT_SIZE << 1;
  mmax = 2;

  while ( n > mmax ) { // this, the outer loop, gets executed log2(FFT_SIZE) times.
    istep = mmax << 1;

    for ( m=1; m<mmax; m+=2 ) {
      for ( i=m; i<=n; i+=istep ) {
	j = i + mmax;

	itr++;
	// And now, the guts of the whole project, the butterfly operation:
	// (aka Danielson-Lanczos formula)

	temp32 = (Xuint32*)(&(data[j]));
	// write real and imag values of J to multiplier
	XIo_Out32( 0x0FEF0008, *temp32 );
	// write coefficients to multiplier
	XIo_Out32( 0x0FEF000C, coeff[ coeff_ofs ] );

	temp = XIo_In32( 0x0FEF0000 );

	tempRI = (Xint16*)(&temp);
	
	data[j] = data[i] - tempRI[0];
	data[j+1] = data[i+1] - (tempRI[1]);
	data[i] += tempRI[0];
	data[i+1] += tempRI[1];
      }
      coeff_ofs++;
    }
    mmax = istep;
  }
}

