/***************************************************************************
 *		  Copyright (C) 1994  Charles P. Peterson                  *
 *	     4007 Enchanted Sun, San Antonio, Texas 78244-1254             *
 *              Email: Charles_P_Peterson@fcircus.sat.tx.us                *
 *                                                                         *
 *		  This is free software with NO WARRANTY.                  *
 *	      See gfft.c, or run program itself, for details.              *
 *		      Support is available for a fee.                      *
 ***************************************************************************
 *
 * Program:     gfft--General FFT analysis
 * File:        rfft.c
 * Purpose:     fast Fourier analysis function with real input array
 * Author:      Charles Peterson (CPP)
 * History:     12-July-1993 CPP; Created.
 *              8-Feb-95 CPP (1.31); Progress Requester
 * Comment:
 *   Inspired by the algorithm expressed in realft, page 417-418 in
 *   Numerical Recipes in C  (William H. Press, Brian P. Flannery, Saul A.
 *   Teukolsky, William T. Vetterling.  Cambridge University Press, 1988.)
 *   This, however, is an original implementation without any copied code.
 *
 *   To fully appreciate this routine, read cfft.c, and the book, first.
 *   cfft.c is where the transforming is done; here, we just use some
 *   tricks to transform twice as many datapoints in (almost) the same
 *   amount of time, because we are starting with real (not complex) data.
 *   We start by pretending that the odd data points are the imaginary
 *   values for the even data points (0, 2, ...).  Then we use a
 *   symmetry modified version of the Danielson-Lanczos equation to
 *   unfold the data into the positive half of the full transform.
 *
 *   F[k] = 1/2 * (H[k] + conj (H[N/2-k])) - 
 *          i/2 * (H[k] + conj (H[N/2-k])) * e ** (2 * PI * i * k / N)
 *   e ** (i * theta) = cos (theta) + i * sin (theta)
 *
 *   The negative half of the transform is simply related to the positive
 *   half by the equation F[n] = conj (F[-n]) (because h(t) is real).
 *
 *   LEGEND
 *   = here denotes equality
 *   * denotes multiply, ** denotes power (familiar & easy to read; sorry)
 *   cconj is the complex conjugate: conj( {x, y} ) = {x, -y}
 *   i = sqr (-1)
 *   e and PI are the named transcendental numbers
 *   cos and sin are the named trigonometric functions
 *   N is the total number of samples input; k is a number 0..N-1
 *   F is the output complex transform; H is the pseudo-transform returned
 *       by sending cfft the packed samples
 *
 *   Single precision floating point, except for trigonometric values
 *   which are computed in double precision.
 *
 *   The values F[0] and F[N/2] are real and independent.  In order to
 *   pack all data into the original space, Press, et. al., chose to
 *   put the value for F[N/2] into the space for the imaginary part of
 *   F[0].  I didn't like this idea at first, but as the higher level
 *   procedures are written to be general, creating and maintaining an
 *   array 1 unit larger for the 'real' case gets to be quite complicated.
 *   And, it's so nice when input and output arrays are exactly the same
 *   size always.  (I'm breathing easier already.)
 *
 *   (-: CRACKPOT OPINION AND RATIONALIZATION MODE ON
 *
 *   Heck, I shouldn't be programming stuff like this in the psuedo-
 *   assembler known as C.  I want a nice high level OO language with
 *   dynamic arrays and such.  On the other hand, I hear this little voice
 *   saying I'm a fool not to use an assembler--particularly one tailored
 *   to a particular DSP.  I argue critical parts can always be recoded,
 *   and right now I want to get the algorithms and structure correct, and
 *   make this a shining example of readable, understandable, and
 *   learnable--yes, literate--code so that even if people don't use it,
 *   some may learn from it.  Unfortunately, we've not yet been delivered
 *   to the Promised Land of perfect languages, with high and low levels
 *   seemlessly integrated and unified by intuitive concepts and syntax
 *   expanding to infinity.  Hmmn, maybe God didn't want that to happen
 *   yet.  Anyway, right now C is the closest thing to UNIversal, and a
 *   fair compromise language too.  And compromises one has to keep on
 *   making in C as well--they go with the turf.  Dammit.
 *
 *   CRACKPOT OPINION AND RATIONALIZATION MODE OFF :-)
 *
 *   Another catch...the input data is real, and the output is (mostly)
 *   complex.  And, since we don't want to allocate another array and copy
 *   data around, what do we do?  Well, Press, et. al., simply always use
 *   float arrays and use a 'convention' as to how the complex data is
 *   stored therein.  (They did that for four1--the equivalent of my
 *   cfft--as well, which I do consider Blasphemy, since all data there is
 *   complex.)  I use Complex_float arrays there and here as well, so at
 *   least I am correct 3 out of 4 times.  Not only that, but it makes the
 *   code much more traceable to the algorithms, I believe.  (Check it out
 *   for yourself.  I went to a lot of trouble writing cfft.c and rfft.c,
 *   so I hope somebody reads them, even if they don't bother to read the
 *   comments.)  But how do you call this function, then?  Well, you could
 *   allocate a Complex_float data array to begin with, and pack the real
 *   input data into it initially.  Or, you could try declaring rfft()
 *   differently in the caller--using a float data array for i/o.  (This
 *   passed my current SAS 6.0 'ANSI' compiler and linker--without even a
 *   warning--until I put a prototype for realft in complex.h.)  Or, you
 *   could cast the data array/pointer to Complex_float at the point of
 *   call in the caller.  That worked for SAS 6.0 as well, with an
 *   appropriate warning.  I like that approach; I hope they don't change
 *   the compiler to make it not work, and I hope GCC and other compilers
 *   are equally forgiving.  Of course, I take no responsibility for any of
 *   this, or anything.  I know nothing, nuuthing.
 *
 */

#include <math.h>

#ifdef _AMIGA
#ifdef _M68881
#include <m68881.h>
#endif
#endif

#include "complex.h"

void rfft (Complex_float datac[], long n, int isign)
{
/* ARGUMENTS:
 *
 * datac       input/output data array
 * -----       transformed according to isign (see below)
 *
 *             * (For isign = 1) Input array is a real array packed (?) into
 *             * the complex array.  For i = 0  to  n, a hypothetical input
 *             * array dataf[0..n] is packed as follows:
 *             * datac[i/2].real = dataf[i]
 *             * datac[i/2].imaginary = dataf[i+1]
 *             * Or, if you have a reasonable compiler, you might simply
 *             * be able to cast the real (float) array as a the required
 *             * Complex_float array and be done with it.  (Please check
 *             * that it is working properly if you do that.)
 *
 *             * Output array is the positive frequency half of the
 *             * full transform, i.e. F[0]..F[N/2].  It is complex, so
 *             * the declared type Complex_float applies.
 *
 *             *** NOTE: The value for F[0] is real (not complex) and is
 *             *** returned in datac[0].real.  The value for F[N/2] is
 *             *** real (not complex) and is returned in datac[0].imaginary.
 *
 *             * (For isign = -1, the roles of input and output above are
 *             * reversed.)
 *
 *  n          number of real data elements packed in input
 * ---         (2 less than half the number of complex numbers to output)
 *             *** n MUST BE A POWER OF 2  ***
 *             *** THIS IS NOT CHECKED FOR ***
 *
 * isign	if 1, datac is replaced with its discrete Fourier transform
 * -----        if -1, datac is replaced with n times its INVERSE
 *              discrete Fourier transform
 *
 */

    Complex_double w;
    Complex_double wk;
    Complex_double wktemp;
    Complex_float wkfloat;
    double theta;
    long k;

    long halfn = n / 2;
    long halfhalfn = halfn / 2;
    float shalf = 0.5;

    if (n < 2)
    {
	return;  /* Nothing like satisfying the boundary condition.    */
	         /* This is the correct result too (as long as you     */
	         /* i/o only one value: datac[0].real), and avoids a   */
	         /* nasty divide-by-zero and use of uninitialized data */
    }

    theta = PI / (double) (isign * halfn);

    if (isign == 1)
    {
	cfft (datac, halfn, isign);
    }
    else
    {
    /*
     * Inverse transforming is done near end of routine.
     * For now, just reset this one factor which differs between
     * normal and inverse cases
     * (set explicitly to encourage multiply-by-half optimizations).
     */
	shalf = (-0.5);
    }
/*
 * We're skipping k=0 and so we get right down to trigonometric values
 */
    wkfloat.real = (float) (wk.real = w.real = cos (theta));

    wkfloat.imaginary = (float) (wk.imaginary = w.imaginary = sin (theta));

    for (k = 1; k < halfhalfn; k++)
    {
	int n2k = halfn - k;
	Complex_float hk;
	Complex_float hn2k;
    /*
     * First, we peel the two transforms (for k and N/2-k) apart
     */
        hk.real = 0.5 * (datac[k].real + datac[n2k].real);
	hk.imaginary = 0.5 * (datac[k].imaginary - datac[n2k].imaginary);
	hn2k.real = shalf * (datac[k].imaginary + datac[n2k].imaginary);
	hn2k.imaginary = (-shalf) * (datac[k].real - datac[n2k].real);
    /*
     * Then, we recombine them using the modified D/L equation
     */
        datac[k].real = hk.real + (hn2k.real * wkfloat.real) -
	  (hn2k.imaginary * wkfloat.imaginary);
	datac[k].imaginary = hk.imaginary + (hn2k.imaginary * 
					     wkfloat.real) +
	  (hn2k.real * wkfloat.imaginary);

	datac[n2k].real = hk.real - (hn2k.real * wkfloat.real) +
	  (hn2k.imaginary * wkfloat.imaginary);
	datac[n2k].imaginary = (-hk.imaginary) +
	  (hn2k.imaginary * wkfloat.real) + (hn2k.real * wkfloat.imaginary);
    /*
     * Now, compute the next power of e
     */
        C_MULT (wk, w, wktemp);  /* i/o must not overlap */
	wkfloat.real = (float) (wk.real = wktemp.real);
	wkfloat.imaginary = (float) (wk.imaginary = wktemp.imaginary);
    }
/*
 * Now we compute and store the values for 0 and N/2
 */
    if (isign == 1)
    {
	float tempr = datac[0].real;
	datac[0].real += datac[0].imaginary;
	datac[0].imaginary = tempr - datac[0].imaginary;
    }
    else
    {
    /*
     * Now, here's the completion of the inverse part
     * You'll have to take this part on faith (I am)
     */
	float tempr = datac[0].real;
	datac[0].real = 0.5 * (tempr + datac[0].imaginary);
	datac[0].imaginary = 0.5 * (tempr - datac[0].imaginary);

	cfft (datac, halfn, isign);
    }
}

/*
 * The number of inner loops would be N log2 N where N is # of bins
 * However, thanks to the above function, the number is (N/2) log N
 * That's why fft_inner_loops is defined here
 * 
 */

long fft_inner_loops (long number_bins)
{
    long log2;
    long alog;
    long loopcount;

    for (alog = 1, log2 = 0; 
	 alog < number_bins; 
	 log2++, alog *= 2);

    loopcount = (number_bins / 2) * log2;

    return loopcount;
}