//---------------------------------------------------------------------------
// File  :  C:\cbproj\SoundUtl\SoundMaths.c
// Date  :  2019-02-11 (ISO 8601,  YYYY-MM-DD)
// Author:  Wolfgang Buescher  (DL4YHF)
//
// Description:
//     Some mathmatical subroutines which are frequently used
//     in DL4YHF's "Sound Utilities".
//
// Revision history (YYYY-MM-DD):
//
//   2019-02-11  Added some stonage ADPCM functions that OpenWebRX uses
//               to compress/decompress audio samples, but also spectra
//               for the remote 'waterfall' display.
//
//   2017-12-18  Tried to use Julien Pommier's PFFFT (Pretty Fast FFT)
//               to speed up SndMat_CalcComplexFft() on systems where
//               SIMD instructions are available (e.g. "SSE" on Pentium M).
//               But since Borland C++Builder V6 doesn't support the
//               necessary intrinsics (as GCC's "xmmintrin.h"), the only chance
//               would be obfuscated inline HEX SEQUENCES (yucc), or a DLL .
//               Decided to compile Julien's PFFFT.C into a DLL with MinGW,
//               result in subdirectory C:\cbproj\SoundUtl\PFFFT_DLL .
//               SoundMaths.c only tries to load that DLL *dynamically*
//               if SWI_SOUNDUTL_USE_PFFFT_DLL is defined (in SWITCHES.H) .
//
//   2011-09-10  SoundMaths.c now also used for the simple spectrum display
//               in some of the 'Audio I/O Libraries' (DLLs),
//               for example c:\cbproj\AudioIO\aio2winamp.c .
//
//   2008-08-08  Added _matherr() here, because the C++ exception handlers
//               (with try..catch) didn't seem to work for certain math
//               functions ( -> popup "Ding-dong !   sqrt: DOMAIN error" ) .
//
//   2005-04-16  Now used in the WOLF GUI for the 'tuning scope' .
//
//   2003-06-23  Reorganized some modules of Spectrum Lab (result: this module)
//
//
// Literature:
//   [SGDSP] = Steven W. Smith,
//          "The Scientists and Engineer's Guide to Digital Signal Processing",
//          Chapter 12, "The Fast Fourier Transform".    www.DSPguide.com .
//          Locally saved by W.B. under c:\Literat1\dspguide\*.pdf .
//
//   [HFTWIN] ("Heinzel, New Flat-Top Windows") :
//          "Spectrum and spectral density estimation by the Discrete Fourier
//           transform (DFT), including a comprehensive list of window
//           functions and some new flat-top windows." (2002-02-15)
//              by G. Heinzel, A. Rdiger, R. Schilling,
//              Max-Planck-Institut fr Gravitationsphysik, Hannover.
//           Locally saved as
//    C:\literatur\Signal_processing_and_filters\New_FFT_Windowing_Functions_2002.pdf
//---------------------------------------------------------------------------


#include "SWITCHES.H"  // project specific compiler switches ("options")
         // like SWI_FLOAT_PRECISION (1:T_Float=float, 2:T_Float=double).
         // Must be included before anything else !


#include <windows.h>
#include <math.h>
#include <float.h> // _isnan() and other not-really-standardized stuff

#pragma hdrstop   // no precompiled headers after this point

#include "SoundMaths.h"    // header for this module

#include "Utility1.h"      // DEBUGGER_BREAK() etc

#ifndef  SWI_SOUND_TAB_INCLUDED  /* may be defined in SWITCHES.H ... */
 #define SWI_SOUND_TAB_INCLUDED 0
#endif
#if( SWI_SOUND_TAB_INCLUDED ) /* Module SoundTab.cpp included ?      */
 #include "SoundTab.h"  // common tables used by many audio processing modules
 // (most important here: a large SINE table to speed things up)
#endif

#if( SWI_USE_OOURAS_FFT ) /* use Takuya Ooura's FFT (a bit faster than textbook-FFT) ? */
# undef   T_FFT_FLOAT
# define  T_FFT_FLOAT float
# include "FFT_Ooura.c"   // compile T. Ooura's code for SINGLE PRECISION
# undef   T_FFT_FLOAT
# define  T_FFT_FLOAT double
# include "FFT_Ooura.c"   // compile T. Ooura's code for DOUBLE PRECISION
#endif // SWI_USE_OOURAS_FFT ?

#if( SWI_USE_KISS_FFT ) /* use Mark Borgerding's radix-N 'KISS' FFT ? */
# include "kiss_fftr.h" // KISS FFT for REAL input (but complex output, of course!)
# include "kiss_fft.h"  // KISS FFT for complex input
  // Using 'KISS FFT' instead of Ooura's was just a test, 2014-03-23 .
  // See benchmark result in c:\cbproj\SpecLab\fft.cpp . Conclusion:
  // > Stick to Ooura's FFT if you don't need FFT sizes
  // > which are not powers of two !
#endif // SWI_USE_KISS_FFT ?

#if( SWI_SOUNDUTL_USE_PFFFT_DLL ) // try to use J.P.'s Pretty Fast FFT (compiled into a DLL) ?
# include "pffft.h"  // Almost the orignal API of J.P.'s Pretty Fast FFT.
                     // SOME (but NOT ALL) of those functions are implemented
                     //       in the DLL, called via function pointer.
                     // Others (especially the malloc/free - stuff)
                     //       are implemented in pffft_dll_host.h instead !
# include "pffft_dll_common.h" // common part, used by DLL-HOST and DLL-implementation
# include "pffft_dll_host.h"   // special part for the "DLL host" to load the PFFFT-DLL DYNAMICALLY
#endif // SWI_SOUNDUTL_USE_PFFFT_DLL ?

// #pragma warn -8017
#pragma warn -8004  // .. is assigned a value that is never used (well..)
#pragma warn -8080  // .. is declared but never used (.. so what ?)

// Debugging stuff ...
int  SndMat_iSourceCodeLine = __LINE__;  // last source code line (WATCH this!)
#define DOBINI() SndMat_iSourceCodeLine=__LINE__
 // DOBINI() added here 2009-07-10 to find the reason for the "phantom breakpoint"

long SndMath_i32CountMathErrors = 0;

/***************************************************************************/
int _matherr(struct exception *e)
  // Added 2008-08-08 for 'Spectrum Lab' .
  //   This should help us get rid of math library exceptions,
  //   like the annoying " * DING-DONG *   sqrt: DOMAIN error"  .
  // Note: Adding a _matherr() to a project only has an effect
  //       if it's implemented in a *.C - module ,   NOT  *.CPP !
  //       Not sure about the reason, most likely a namespace problem
  //       or caused by the C++ "name mangling" .
  //
  // Note: the proper type is "struct exception", NOT _exception !
  //       The struct 'exception' is defined in math.h  .
  //       It can tell us a bit about the cause of the exception,
  //       and allows setting the result of the function call
  //       (like sqrt(), log10(), or the other usual "suspects") .
  //
  // Copied from Borland's help system :
  // > Certain math errors can also occur in library functions; for instance,
  // > if you try to take the square root of a negative number.
  // > The default behavior is to print an error message to the screen,
  // > and to return a NAN (an IEEE not-a-number). Use of the NAN is likely
  // > to cause a floating-point exception later, which will abort the program
  // > if unmasked. If you dont want the message to be printed,
  // > insert the following version of _matherr into your program (...)
  // > Any other use of _matherr to intercept math errors is not encouraged;
  // > it is considered obsolete and might not be supported in future versions
  // > of C++Builder.
  // Oh well. "Considered obsolete" but what's the replacement for it ? ?
  // The stupid C++ exceptions -try..catch(...)- are not able to catch
  //     floating point errors; at least not in BCB V4 and V6 .
{
  #ifndef DWORD
    #define DWORD unsigned long
  #endif
  union
   { DWORD dw;
     unsigned char b[4];
   } name4;
  #define C2N(a,b,c,d) ((DWORD)a|(((DWORD)b)<<8)|(((DWORD)c)<<16)|(((DWORD)d)<<24))

  // Pack the function name, up to 4 characters in a 32-bit 'DWORD',
  // respecting the trailing zero in the C string .
  // This makes the name suited for a switch..case statement below.
  name4.dw = 0;
  name4.b[0] = e->name[0];
  if( name4.b[0] )
   { name4.b[1] = e->name[1];
     if( name4.b[1] )
      { name4.b[2] = e->name[2];
        if( name4.b[2] )
         { name4.b[3] = e->name[3];
         }
      }
   }

  // Find the "best suited return value" for this application .
  //      (But caution, the return value is ignored when the ERROR
  //       is signalled as an EXCEPTION to the caller with RaiseException() !)
  // The function name should give the most clues ...
  switch( name4.dw ) // for keywords with up to 4 characters, we don't need "strcmp" !
   { case C2N('s','q','r','t') :  // most likely, sqrt(-X) ..
        e->retval = 0.0;
        break;
     case C2N('l','o','g','1') :  // most likely, log10(0) or log10(-X)
        e->retval = -100.0;       // emit a "very negative magnitude" (logarithm)
                 // Note: log10 is often used in a formula like 20*log10(X)
                 //       to convert a voltage ratio X into dB ,
                 //   or 10*log10(P) to convert a power ratio P into dB .
                 // If the input (X) is zero, the result will be -300 dB ,
                 // which will never occurr in a real system .
        // Unfortunately, if RaiseException() is called below,
        // this 'return value' (retval) is *NOT* returned to the caller.
        //   Shit .    So, to keep it simple, we don't call RaiseException ;-)
        break;
     case C2N('p','o','w','\0') :  // most likely, pow(x,y) ...
        // 2010-05-19 : got here with the following call stack:
        //    CalculateArbitraryWaveform("cos(pi*x)^1000000")
        //      -> CLI_CalcSum_Float() -> CLI_CalcSum2() -> CLI_CalcProd()
        //          -> CLI_CalcPow() -> __org_pow() -> __matherr() -> _matherr()
        e->retval = 0.0;
        break;
     default:  // for all OTHER FUNCTIONS, just look at the exception TYPE..
        switch( e->type )
         { case DOMAIN:   // Argument was not in domain of function, such as log(-1).
              e->retval = 0.0;
              break; // end case case DOMAIN

           case SING:     // Argument would result in a singularity, such as pow(0, -2).
              e->retval = 0.0;
              break;

           case OVERFLOW: // Argument would produce a function result greater than
                          // DBL_MAX (or LDBL_MAX), such as exp(1000).
              e->retval = 0.0;
              break;

           case UNDERFLOW: // Argument would produce a function result less than
                           // DBL_MIN (or LDBL_MIN), such as exp(-1000).
              e->retval = 0.0;
              break;

           case TLOSS: // Argument would produce function result with total loss
                       // of significant digits, such as sin(10e70).
              e->retval = 0.0;
              break;

            default:  // what could this be ??
              e->retval = 0.0;
              break;
         } // end switch( e->type )
        break; // end case < all OTHER math functions >
   }

  // The following was just a TEST, but to the author's big surprise
  //  it really helped to catch the floating point errors
  //  in the caller's C++ standard exception handler (try/catch).   WHOW....
  // ex: RaiseException( EXCEPTION_FLT_INVALID_OPERATION,  0, 0 , NULL );
  // BUT:  It's the author's feeling that calling RaiseException() from here
  //       may be unsafe, because RaiseException() is a Win32 API function .

  ++SndMath_i32CountMathErrors; // count the number of errors (for the debug-log)

  return 1;       /* error has been handled */
} // end _matherr()


/***************************************************************************/
CPROT void SndMat_Init( void )
  // Should be called ONCE on init. Tries to dynamically load a DLL
  // with a faster FFT (will be used by SndMat_CalcComplexFft when available).
{
#if( SWI_SOUNDUTL_USE_PFFFT_DLL ) // try to use J.P.'s Pretty Fast FFT (compiled into a DLL) ?
  if( PFFFT_Host_iDLLStatus==PFFFT_DLL_NOT_LOADED) // did't try to load the DLL yet ?
   {  PFFFT_Host_Init();
      if( PFFFT_Host_iDLLStatus!=PFFFT_DLL_LOADED_OK ) // failed to load "PFFFT_DLL.DLL"..
       {
         DEBUG_EnterErrorHistory( DEBUG_LEVEL_WARNING, 0, UTL_USE_CURRENT_TIME,
                 "Failed to load the Pretty Fast FFT (PFFFT_DLL.DLL) !" );
         DEBUG_EnterErrorHistory( DEBUG_LEVEL_WARNING, 0, UTL_USE_CURRENT_TIME,
                 "Using the slower textbook FFT without SIMD instead ." );
       }
   }
#endif // SWI_SOUNDUTL_USE_PFFFT_DLL ?
} // end SndMat_Init()

/***************************************************************************/
CPROT void SndMat_Exit( void )
  // Should be called ONCE when the application exits. Frees resources,
  // and possibly unloads the 'pretty fast FFT' DLL .
{
#if( SWI_SOUNDUTL_USE_PFFFT_DLL ) // try to use J.P.'s Pretty Fast FFT (compiled into a DLL) ?
  PFFFT_Host_Exit();
#endif
} // end SndMat_Exit()


/***************************************************************************/
void SndMat_ClearFloatArray(T_Float *pfltArray, int iLength )
  // Clears an array with floating point values (sets all cells to 0.0) .
{
  if( pfltArray!=NULL )
   { while( iLength-- )
      { *(pfltArray++) = 0.0;
      }
   }
} // end SndMat_ClearFloatArray()

/***************************************************************************/
void SndMat_SwapFloatArrayHalves(float *pfltArray, int iLength )
  // Strange function indeed; but we need it somewhere ;-)
  // exchanges pfltArray[0...iLength/2-1]
  //      with pfltArray[iLength/2..iLength-1] .
{
  float fltTemp, *pfltPart1, *pfltPart2;
  if(pfltArray)
   { iLength /= 2;
     pfltPart1 = &pfltArray[0];
     pfltPart2 = &pfltArray[iLength];
     while( (iLength--)>0)
      { fltTemp = *pfltPart1;
        *pfltPart1++ = *pfltPart2;
        *pfltPart2++ = fltTemp;
      }
   }
} // end SndMat_SwapFloatArrayHalves()

/***************************************************************************/
void SndMat_SwapDoubleArrayHalves(double *pfltArray, int iLength )
  // Similar as above, but for DOUBLE PRECISION floats .
  // exchanges pfltArray[0...iLength/2-1]
  //      with pfltArray[iLength/2..iLength-1] .
{
  double fltTemp, *pfltPart1, *pfltPart2;
  if(pfltArray)
   { iLength /= 2;
     pfltPart1 = &pfltArray[0];
     pfltPart2 = &pfltArray[iLength];
     while( (iLength--)>0)
      { fltTemp = *pfltPart1;
        *pfltPart1++ = *pfltPart2;
        *pfltPart2++ = fltTemp;
      }
   }
} // end SndMat_SwapDoubleArrayHalves()


/***************************************************************************/
void SndMat_ResampleFloatArray(T_Float *pfltArray, int iSourceLength, int iDestLength )
  // Stretches or shrinks an array.  Originally used for the FFT-based filter,
  // to adapt the frequency response curve when changing the FFT size .
  // Neither iSourceLength nor iDestLength may be zero or negative !
{
  float fltStretchFactor = (float)iSourceLength / (float)iDestLength;
  float fltSourceIndex, fltSrcLeft, fltSrcRight, fltTemp;
  int iDstIdx, iSrcIdx, iStartIdx, iEndIdx, iStep;

  if( iDestLength > iSourceLength  )  // "stretching" (array gets LARGER) :
   {  // begin at the END of the array to avoid overwriting values
      iStartIdx = iDestLength-1;
      iEndIdx   = 0;
      iStep     = -1;
   }
  else // ( iDestLength < iSourceLength ) -> "shrinking" (array gets SMALLER) :
   {  // begin at the START of the array ...
      iStartIdx = 0;
      iEndIdx   = iDestLength-1;
      iStep     = +1;
   }
  for(iDstIdx=iStartIdx; iDstIdx>=0 && iDstIdx<iDestLength; iDstIdx+=iStep)
   { fltSourceIndex = (float)iDstIdx * fltStretchFactor;
     if( fltSourceIndex < 0.0 )
         fltSourceIndex = 0.0;
     if( fltSourceIndex >= iSourceLength )
         fltSourceIndex = iSourceLength-1;
     iSrcIdx = (int)fltSourceIndex;
     fltSrcLeft = pfltArray[iSrcIdx];
     if( (iSrcIdx+1) < iSourceLength)
          fltSrcRight = pfltArray[iSrcIdx+1];
     else fltSrcRight = fltSrcLeft;
     // Interpolate between "left" and "right" value :
     fltTemp = fltSourceIndex - (float)iSrcIdx;  // -> fractional index, 0 .. 0.999999
     fltTemp = fltSrcLeft * (1.0-fltTemp) + fltSrcRight * fltTemp;
     pfltArray[iDstIdx] = fltTemp;
   } // end for(iDstIdx ..

} // end SndMat_ResampleFloatArray()

/***************************************************************************/
void SndMat_ResampleDoubleArray(double *pfltArray, int iSourceLength, int iDestLength )
  // Stretches or shrinks an array of doubles.
  // Besides the data type, same function as SndMat_ResampleFloatArray() .
{
  double fltStretchFactor = (double)iSourceLength / (double)iDestLength;
  double fltSourceIndex, fltSrcLeft, fltSrcRight, fltTemp;
  int iDstIdx, iSrcIdx, iStartIdx, iEndIdx, iStep;

  if( iDestLength > iSourceLength  )  // "stretching" (array gets LARGER) :
   {  // begin at the END of the array to avoid overwriting values
      iStartIdx = iDestLength-1;
      iEndIdx   = 0;
      iStep     = -1;
   }
  else // ( iDestLength < iSourceLength ) -> "shrinking" (array gets SMALLER) :
   {  // begin at the START of the array ...
      iStartIdx = 0;
      iEndIdx   = iDestLength-1;
      iStep     = +1;
   }
  for(iDstIdx=iStartIdx; iDstIdx>=0 && iDstIdx<iDestLength; iDstIdx+=iStep)
   { fltSourceIndex = (double)iDstIdx * fltStretchFactor;
     if( fltSourceIndex < 0.0 )
         fltSourceIndex = 0.0;
     if( fltSourceIndex >= iSourceLength )
         fltSourceIndex = iSourceLength-1;
     iSrcIdx = (int)fltSourceIndex;
     fltSrcLeft = pfltArray[iSrcIdx];
     if( (iSrcIdx+1) < iSourceLength)
          fltSrcRight = pfltArray[iSrcIdx+1];
     else fltSrcRight = fltSrcLeft;
     // Interpolate between "left" and "right" value :
     fltTemp = fltSourceIndex - (double)iSrcIdx;  // -> fractional index, 0 .. 0.999999
     fltTemp = fltSrcLeft * (1.0-fltTemp) + fltSrcRight * fltTemp;
     pfltArray[iDstIdx] = fltTemp;
   } // end for(iDstIdx ..

} // end SndMat_ResampleDoubleArray()


/***************************************************************************/
double SndMat_CalculateAngle(double re, double im)
  // Four-quadrant conversion of a complex pair ("I/Q")
  //   into an phase value (in radians, but explained in degrees here).
  // A positive real value gives an angle of zero, etc.
  // Returned value range is -180 .. +180 =  -pi .. +pi .
  // If both real and imaginary part are zero, the returned value
  // is zero.
  // Revision history:
  //   Jan 13, 2002, by DL4YHF:
  //       Implemented for a phase-sensitive spectrum analyser
  //   Sept  15, 2002:
  //       Copied into AM_FM_DeMod.cpp (sound utilities)
  //   2007-03-11 :
  //       Copied into the FFT-filter-plugin demo .
{
#define ANGLE_RANGE_PLUS_MINUS_180_DEGREES 1  // 1: result_range = -pi..pi = -180..+180
     // (-180..+180 is preferred because angles tend to be +-0.x degrees,
     //  and it looks ugly if the display jumps from "0.1" to "359.9" and back)
  if(im > 0.0)
   {  // first or second quadrant
     if( re > 0.0 )
      { // first quadrant (0..90 degees)
        return atan(im/re);
      }
     else
     if( re < 0.0 )
      { // second quadrant (90..180 degrees)
        return atan(im/re) + C_PI;
      }
     else // re=0, im>0
      {
        return 0.5 * C_PI;
      }
   }
  else // ! im>0
  if(im < 0.0)
   {  // third or fourth quadrant
     if( re < 0.0 )
      { // third quadrant
#if(ANGLE_RANGE_PLUS_MINUS_180_DEGREES)
        return atan(im/re) - C_PI;     // for result range -180..-90
#else
        return atan(im/re) + C_PI;     // for result range 180..270
#endif
      }
     else
     if( re > 0.0 )
      { // fourth quadrant
#if(ANGLE_RANGE_PLUS_MINUS_180_DEGREES)
        return atan(im/re);           // for result range -90..0
#else
        return atan(im/re) + 2*C_PI;  // for result range 270..360
#endif
      }
     else // re=0, im<0  -> 270 degrees
      {
#if(ANGLE_RANGE_PLUS_MINUS_180_DEGREES)
        return -0.5 * C_PI;
#else
        return 1.5 *  C_PI;
#endif
      }
   }
  else   // im=0, a "real" number
   {
     if(re>=0)
        return 0;
     else
        return C_PI;    // negative -> 180 degrees
   }
} // end ..CalculateAngle()


/***************************************************************************/
T_FAST_FLOAT SndMat_CalculateAngleFast(T_FAST_FLOAT x, T_FAST_FLOAT y)
{ // Fast atan2 calculation with self normalization.
  // Returned value range is  -pi..pi =  -180 .. +180 .
  //
  // Based on an article by Jim Shima, found at
  //       http://www.dspguru.com/comp.dsp/tricks/alg/fxdatan2.htm .
  // The Trick:
  //  compute a self-normalizing ratio depending on the quadrant
  //  that the complex number resides in.
  //  For a complex number z, let x = Re(z) and y = Im(z).
  //
  //  For a complex number in quadrant I (0<=theta<=pi/4), compute the ratio:
  //
  //     x-y
  // r = ---     (1)
  //     x+y
  //
  // To get the phase angle, compute:
  //
  // theta1 = pi/4 - pi/4*r (2)
  //
  // Likewise, if the complex number resides in quadrant II (pi/4<=theta<=3*pi/4),
  // compute the ratio:
  //
  //     x+y
  // r = ---     (3)
  //     y-x
  //
  // And to get the quadrant II phase angle, compute:
  //
  // theta2 = 3*pi/4 - pi/4*r (4)
  //
  // If it turns out that the complex number was really in quad IV
  //  instead of quad I, just negate the answer resulting from (2).
  //
  // Likewise, do the same if the number was in quad III
  // instead of quad II. By doing this, you have a 4-quadrant arctan function.
  //
  // The max error using equations (2) or (4) is a little less than 0.07 rads
  // (only at a few angles though). The accuracy of the estimator is actually
  // quite good considering using a 1st-order polynomial to estimate the phase angle.
  //
  // If you use a higher degree polynomial, it turns out that the even powers
  // of the poly will disappear (due to the odd function), thus relaxing some
  // of the computational load.
  //
#define ATAN2_HIGH_ACCURACY 1
  // FOR BETTER ACCURACY:
  //   To obtain better accuracy (a max error of .01 rads =~ 0.6 degrees),
  //   one can replace equations (2) and (4) with:
  //       theta1 = 0.1963 * r^3 - 0.9817 * r + pi/4   (2a)
  //       theta2 = 0.1963 * r^3 - 0.9817 * r + 3*pi/4 (4a)
  //
  //  Equations (2a) or (4a) can be computed using 2 MACs on a DSP,  // (YHF: hw^3 ?)
  //  which does not involve much more computation for a 7x increase
  //  in accuracy.
  //
  // C code using equations (1)-(4):
  //-----------------------------------------------
  // Fast arctan2
  static T_FAST_FLOAT coeff_1 = C_PI/4;
  static T_FAST_FLOAT coeff_2 = 3*C_PI/4;
  T_FAST_FLOAT r,angle;
 // ex:  T_FAST_FLOAT abs_y = fabs(y)+1e-10;    // kludge to prevent 0/0 condition
  T_FAST_FLOAT abs_y = fabs(y)+1e-30;    // kludge to prevent 0/0 condition, more accurate result

  // 2013-11-25: Suspected a problem with SndMat_CalculateAngleFast()
  //    in C:\cbproj\ColorDF\ColourDF.cpp, when x or y were 'very large'.
  //    Added the following stuff to debug this:
  r = fabs(x) + abs_y;   // -> 'Manhattan length' is ok for this purpose !
  if(  r > 1.0 )  // test added 2013-11-25 : no effect, neither with r>1 nor r>100 .. but doesn't hurt
   {  x /= r;
      y /= r;
      abs_y = fabs(y)+1e-30;
   } // end of the code added 2013-11-25

  if (x>=0)
   {
      r = (x - abs_y) / (x + abs_y);         // (equation 1)
#if(ATAN2_HIGH_ACCURACY)
      angle = coeff_1 - 0.9817 * r + 0.1963 * r*r*r; // (2a)
#else
      angle = coeff_1 - coeff_1 * r;         // (equation 2)
#endif // (ATAN2_HIGH_ACCURACY)
   }
  else // x<0
   {
      r = (x + abs_y) / (abs_y - x);         // (equation 3)
#if(ATAN2_HIGH_ACCURACY)
      angle = coeff_2 - 0.9817 * r + 0.1963 * r*r*r; // (4a)
#else
      angle = coeff_2 - coeff_1 * r;         // (equation 4)
#endif // (ATAN2_HIGH_ACCURACY)
   }

  if (y < 0)
     return(-angle);     // negate if in quad III or IV
  else
     return(angle);
} // end SndMat_CalculateAngleFast()



/***************************************************************************/
void SndMat_RunComplexFIR( // complex FIR-filter (usually a low pass)
      int       iNrCoeffs,   // Length of filter queue + number of coeffs
      T_Float   *pfltCoeffs, // pointer to filter coefficients   [iNrCoeffs]
      T_Complex *pcpxQueue,  // pointer to filter queue (memory) [iNrCoeffs]
      int       *piQueueIdx, // index for circular filter queue, 0..iNrCoeffs-1
      T_Complex *pcplxValue) // reference to in- and output value
      // ( T_Complex defined in \cbproj\SoundUtl\SoundTab.h )
{
 T_Complex acc;
 T_Complex *pQueueEnd = pcpxQueue + iNrCoeffs;
 T_Complex *qptr;
 int j;

   --*piQueueIdx;
   if(*piQueueIdx<0)       // deal with FIR pointer wrap
      *piQueueIdx = iNrCoeffs-1;
   qptr = pcpxQueue/*array*/ + *piQueueIdx/*index*/ ;
   *qptr = *pcplxValue;    // place filter "input"  in circular Queue

   acc.re = 0.0;           // prepare accumulation
   acc.im = 0.0;
   for(j=0; j<iNrCoeffs; ++j )   // do the complex MAC's
    {
     acc.re += ( (qptr->re)*(*pfltCoeffs) );
     acc.im += ( (qptr->im)*(*pfltCoeffs++) );
     if( (++qptr) >= pQueueEnd ) // deal with wraparound
            qptr  =  pcpxQueue;
    }
   // filter output now in acc .
   *pcplxValue = acc;             // re+im back to the caller

} // end SndMat_RunComplexLowpass(..)

/***************************************************************************/
void SndMat_MultiplyHanningWindow( float *pfltArray, int iLength )
{ int i;
  float fltAngle, fltAngleIncr;
   fltAngle = 0.0;
   fltAngleIncr = (2.0 * C_PI) / (float)(iLength-1) ;

   for(i=0; i<iLength; i++) // multiply the table with FFT WINDOW FUNCTION ..
    {
     pfltArray[i] *= ( .5 - .5*cos(fltAngle) );
     fltAngle += fltAngleIncr;
    }
} // end SndMat_MultiplyHanningWindow()

/***************************************************************************/
void SndMat_CalcComplexFft( // ... for SINGLE PRECISION floating point values
          int iNrOfPoints,    // N =  number of points in the DFT *AND* in the time domain
          float *pfltRe,      // REX[] = real parts of input and output
          float *pfltIm )     // IMX[] = imaginary parts of input and output
 //  THE FAST FOURIER TRANSFORM  - inspired by [SGDSP] TABLE 12-3 or -4 .
 //     No cluttered classes, global vars, windowing, averaging and whatsoever-
 //     Just the classic complex FFT (complex input, complex output) !
 //  Upon entry, N contains the number of points in the DFT, REX[ ] and
 //     IMX[ ] contain the real and imaginary parts of the input.
 //     All signals run from 0 to N-1.
 //  Upon return, REX[0..N-1] & IMX[0..N-1] contain the DFT output:
 //     The frequencies between 0 and N/2 are positive,
 //     while the frequencies between N/2 and N-1 are negative.
 //     Remember, the frequency spectrum of a discrete signal is
 //     periodic, making the negative frequencies between N/2 and N-1
 //     the same as between -N/2 and 0. The samples at 0 and N/2
 //     straddle the line between positive and negative.
 // More specific: Upon return,
 //   - pfltXX[0] contains the DC component
 //   - pfltXX[1] contains the smallest positive frequency
 //   - pfltXX[N/2-1] contains the largest positive frequency
 //   - pfltXX[N/2] is a special case, see TEST RESULTS further below !
 //   - pfltXX[N/2+1] is the bin with the "most negative" frequency
 //   - pfltXX[N-1] contains the smallest negative frequency ("small but negative")
 // Output range:  A pure sine wave ... A*sin(wt)... will produce an
 //                fft output peak of (N*A/4)^2  where N is FFT_SIZE.
 //   [ this is for a HANN- or similar window, with m_fltWindowAvrg=0.5 ]
 //
 // TEST RESULTS (copied from PhaseAmplMeter.cpp, 2010-05-02) :
 //  *  TEST A: feed a "DC" test signal into the FFT :
 //         for(i=0; i<8192; ++i)
 //          { pPAM->fltFftBuf_re[i] = 1.0;
 //            pPAM->fltFftBuf_im[i] = 2.0;
 //          }
 //     Test result (directly after calling SndMat_CalcComplexFft(8192,..) ) :
 //     pPAM->fltFftBuf_re[0] =  8192
 //     pPAM->fltFftBuf_im[0] = 16384  (all other bins were ZERO)
 //
 //  *  TEST B: feed a test signal with a "positive" (??) frequency, at fs/4,
 //       into the FFT :
 //       const int t4[4]={ 0,1,0,-1 };
 //       for(i=0; i<pPAM->m_i32SamplesPerDFT; ++i)
 //        { pPAM->fltFftBuf_re[i] = t4[(i+1)&3];  // "re" leads "im" ...
 //          pPAM->fltFftBuf_im[i] = t4[(i+0)&3];  // (not sure if this should be
 //          // a positive or negative frequency.. see the NCO multiplication..)
 //        }
 //      Test result (directly after calling SndMat_CalcComplexFft(8192,..) ) :
 //      pPAM->fltFftBuf_re[2048] =  8192
 //      pPAM->fltFftBuf_im[2048] =   0     (all other bins were ZERO)
 //
 //  *  TEST C: feed a test signal with a "negative" (??) frequency, at fs/4,
 //             into the FFT ?
 //       const int t4[4]={ 0,1,0,-1 };
 //       for(i=0; i<pPAM->m_i32SamplesPerDFT; ++i)
 //        { pPAM->fltFftBuf_re[i] = t4[(i+0)&3];  // "re" LAGS "im" ...
 //          pPAM->fltFftBuf_im[i] = t4[(i+1)&3];  // (not sure if this should be
 //          // a positive or negative frequency.. see the NCO multiplication..)
 //        }
 //      Test result (directly after calling SndMat_CalcComplexFft(8192,..) ) :
 //      pPAM->fltFftBuf_re[6144] =   0
 //      pPAM->fltFftBuf_im[6144] = 8192   (all other bins were ZERO)
 //
 //  *  TEST D: feed a test signal with "the largest possible" frequency, at fs/2,
 //       into the FFT. Note: It's impossible to say if
 //       this frequency is "negative" or "positive". Go figure....
 //        for(i=0; i<pPAM->m_i32SamplesPerDFT; ++i)
 //         { pPAM->fltFftBuf_re[i] = (i&1) ? 1 : -1;
 //           pPAM->fltFftBuf_im[i] = 0;
 //           // Try to shift the phase for the Q-channel by +90 or -90 :
 //           //     the result is always zero .. so no "leading" nor "lagging" !
 //           // Conclusion: The complex frequency bin at index <FftSize/2>
 //           // is 'something special', just as well as the "DC" bin at index zero;
 //           // but for real-world application the "DC" bin is much more important
 //           // than this 'maximum-possible-frequency' bin (at the Shannon limit).
 //         }
 //       Test result (directly after calling SndMat_CalcComplexFft(8192,..) ) :
 //       pPAM->fltFftBuf_re[4096] = -8192
 //       pPAM->fltFftBuf_im[4096] =   0   (all other bins were ZERO)
 //
 // Benchmarks on a HP laptop, Intel i7 CPU in "economy" power setting,
 //               32768 complex samples in/out :
 //  PFFFT (Pretty Fast FFT by Julien Pommier) : 1.17 ms / FFT (using SIMD instructions, only possible in GCC but not Borland)
 //  FFTPACK (old Fortran code converted to C) : 2.45 ms / FFT
 //  original SndMat_CalcComplexFft( 2k)       : 4.7 ms  / FFT
 //  original SndMat_CalcComplexInverseFft(32k): 4.7 ms  / FFT (almost the same as SndMat_CalcComplexFft)

{  // begin SndMat_CalcComplexFft() ...

#if( SWI_SOUNDUTL_USE_PFFFT_DLL ) // Pretty Fast FFT supported (in a DLL) ?
  PFFFT_Setup *pfFFT; // instance data for a ("PF"-)FFT with a certain size
  int   nBytes, iCplx, iFloat;
  float *pfltInput, *pfltOutput, *pfltWorkArea;
  BOOL  ok = FALSE;
#endif // SWI_SOUNDUTL_USE_PFFFT_DLL ?

#if( SWI_USE_OOURAS_FFT ) // Use Takuya Ooura's FFT (fftsg_h.c) :
  // Note that much in contrast to the FFT from the 'DSP Guide',
  //  Ooura's FFT uses a single array with complex numbers .
  //  For MANY (if not all) applications, this is more convenient,
  //  but -unfortunately- utterly incompatible with the older FFT  :-(
  float *pfltTemp, *pflt;
  int i;
#endif // SWI_USE_OOURAS_FFT ?


#if ( ! SWI_USE_OOURAS_FFT ) && ( ! SWI_USE_KISS_FFT )
  // Here, for comparison and because of THIS FFT's simplicity,
  //       a clean, textbook style FFT based on the 'DSP Guide' :
  int I,J,JM1,K,L,M,LE,LE2, IP;
  int NM1 = iNrOfPoints - 1;
  int ND2 = iNrOfPoints / 2;
  float UR, UI, SR, SI, TR, TI;
#endif // ( ! SWI_USE_OOURAS_FFT ) && ( ! SWI_USE_KISS_FFT )


#if( SWI_SOUNDUTL_USE_PFFFT_DLL ) // try to use J.P.'s Pretty Fast FFT (compiled into a DLL)..
  if( PFFFT_Host_iDLLStatus==PFFFT_DLL_LOADED_OK) // Pretty Fast FFT successfully loaded ?
   { pfFFT = pffft_new_setup( iNrOfPoints, PFFFT_COMPLEX );
     if( pfFFT != NULL )  // the requested size seems to be supported...
      { nBytes = iNrOfPoints * 2/*complex!*/ * sizeof(float);
        if( (pfltInput=pffft_aligned_malloc(nBytes)) != NULL )
         { if( (pfltOutput=pffft_aligned_malloc(nBytes)) != NULL )
            { if( (pfltWorkArea=pffft_aligned_malloc(nBytes)) != NULL )
               { for( iCplx=iFloat=0; iCplx<iNrOfPoints; ++iCplx )
                  { pfltInput[ iFloat++ ] = pfltRe[iCplx];
                    pfltInput[ iFloat++ ] = pfltIm[iCplx];
                  }
                 pffft_transform_ordered(pfFFT, pfltInput, pfltOutput,
                                                pfltWorkArea, PFFFT_FORWARD);
                 for( iCplx=iFloat=0; iCplx<iNrOfPoints; ++iCplx )
                  { pfltRe[iCplx] = pfltOutput[ iFloat++ ];
                    pfltIm[iCplx] = pfltOutput[ iFloat++ ];
                  }
                 ok = TRUE;
                 pffft_aligned_free( pfltWorkArea );
               }
              pffft_aligned_free( pfltOutput );
            }
           pffft_aligned_free( pfltInput ); 
         }
        pffft_destroy_setup(pfFFT); // free instance (twiddle factors, etc)
      }
   }    // end if < Pretty Fast FFT available >
  if( ok )
   { return; // job successfully completed by PFFFT !
   }
#endif // SWI_SOUNDUTL_USE_PFFFT_DLL ?

#if ( ! SWI_USE_OOURAS_FFT ) && ( ! SWI_USE_KISS_FFT )
  // ex: m = CINT(LOG(N%)/LOG(2))
  M = 0; I=iNrOfPoints; while(I>1){ ++M; I = (I>>1); }   // -> m = log2( n )
  J = ND2;

  DOBINI();

  for(I=1; I<NM1; ++I)                  // Bit reversal sorting
   {
     if(I<J) // 1120   IF I% >= J% THEN GOTO 1190
      { TR = pfltRe[J];
        TI = pfltIm[J];
        pfltRe[J] = pfltRe[I];   // 2008-08-19: Crashed here with an access violation,
           // after switching from the "default config" to "SDR-IQ with audio output".
        pfltIm[J] = pfltIm[I];
        pfltRe[I] = TR;
       pfltIm[I] = TI;
      }
     K = ND2;    // 1190

     while(K<=J) // 1200   IF K% > J% THEN GOTO 1240
      { J = J - K;
        K = K / 2;
      }          // 1230  GOTO 1200
     J += K;     // 1240   J% = J%+K%
   }             // 1250 NEXT I%

  DOBINI();

  for( L=1; L<=M; ++L)             // 1270 Loop for each stage
   {
     LE = 1<<L;   // 1280  LE% = CINT(2^L%)
     LE2 = LE/2;  // 1290  LE2% = LE%/2
     UR = 1;
     UI = 0;
     DOBINI();
     // Use the standard trig functions instead of table lookup.
     // (these calculations are rarely done; not worth to eliminate sin+cos here)
     SR = cos(C_PI/(float)LE2);   // Calculate sine & cosine values
     SI = -sin(C_PI/(float)LE2);
     for(J=1; J<=LE2; ++J)        // 1340 Loop for each sub DFT
      { JM1 = J-1;
        DOBINI();
        for(I=JM1; I<=NM1; I+=LE) // 1360 Loop for each butterfly
         { IP = I+LE2;
           TR = pfltRe[IP]*UR - pfltIm[IP]*UI;  // Butterfly calculation
           TI = pfltRe[IP]*UI + pfltIm[IP]*UR;
           pfltRe[IP] = pfltRe[I]-TR;
           pfltIm[IP] = pfltIm[I]-TI;
           pfltRe[I]  = pfltRe[I]+TR;
           pfltIm[I]  = pfltIm[I]+TI;
         } // NEXT I
        DOBINI();
        TR = UR;                  // 1450
        UR = TR*SR - UI*SI;
        UI = TR*SI + UI*SR;
      } // NEXT J
     DOBINI();
   } // NEXT L
#endif // < use 'plain textbook-style FFT from the 'DSP Guide' > ?

#if( SWI_USE_OOURAS_FFT ) // Use Takuya Ooura's FFT (fftsg_h.c) :
  // Note that much in contrast to the FFT from the 'DSP Guide',
  //  Ooura's FFT uses a single array with complex numbers .
  //  For MANY (if not all) applications, this is more convenient,
  //  but -unfortunately- utterly incompatible with the older FFT  :-(
  pfltTemp = (float*)malloc( ( 2*iNrOfPoints ) * sizeof(float) );   // MUST BE THREAD-SAFE / reentrant !
  if( pfltTemp != NULL )
   {
     // Combine the input (separate real and imaginary parts) .
     // It remained unclear why the imaginary part had to be inverted
     // before and after the FFT, to produce the same result
     // with Ooura's FFT as with the simple FFT code from the DSP-Guide !
     pflt = pfltTemp;
     for(i=0;i<iNrOfPoints; ++i)
      { *pflt++ =  pfltRe[i];
        *pflt++ = -pfltIm[i];   // !!!?!
      }

     cdft_flt( 2*iNrOfPoints, 1/*forward*/, pfltTemp );
     // Complex Discrete Fourier Transform  .
     // > Usage:  cdft_flt(2*n, 1, a) : forward FFT, single precision float
     // > Parameters:
     // >   2*n            :data length (int)
     // >                   n >= 1, n = power of 2
     // >   a[0...2*n-1]   :input/output data (float *)
     // >                   input data
     // >                       a[2*j] = Re(x[j]),
     // >                       a[2*j+1] = Im(x[j]), 0<=j<n
     // >                   output data
     // >                       a[2*k] = Re(X[k]),
     // >                       a[2*k+1] = Im(X[k]), 0<=k<n
     //

     // Split up the real and imaginary part again:
     pflt = pfltTemp;
     for(i=0;i<iNrOfPoints; ++i)
      { pfltRe[i] =  *pflt++;
        pfltIm[i] = -*pflt++;   // !!
      }
     // Remember, the output of the complex FFT from 'DSP-Guide' chapter 12 was this :
     //   - pfltXX[1]     contained the smallest positive frequency
     //   - pfltXX[N/2-1] contained the largest positive frequency
     //   - pfltXX[N/2]   contained the most negative frequency ("very negative")
     //   - pfltXX[N-1]   contained the smallest negative frequency ("small but negative")
     //   - pfltXX[0]     obviously contained the "DC" bin (?)
     free( pfltTemp );    // clean up the temporary 'complex' array WITHOUT guard-area
   }
  else
   { DEBUGGER_BREAK();   // heavens no; running out of memory on a PC ?!
   }

#endif // SWI_USE_OOURAS_FFT ?


#if( SWI_USE_KISS_FFT ) /* use Mark Borgerding's radix-N 'KISS' FFT ? */
  int i, nFreqBins;
  kiss_fft_cfg kiss_FFT_object; // black box created by kiss_fft_alloc()
  kiss_fft_cpx  *complexFreqBins;
  kiss_FFT_object = kiss_fft_alloc( iNrOfPoints, 0/*not inverse*/, NULL, NULL );
  nFreqBins = iNrOfPoints;
  complexFreqBins = malloc( nFreqBins * sizeof(kiss_fft_cpx) );
  if( complexFreqBins != NULL )
   {
     for(i=0; i<nFreqBins; ++i)
      { complexFreqBins[i].r = pfltRe[i];
        complexFreqBins[i].i = pfltRe[i];
      }
     kiss_fft( kiss_FFT_object, complexFreqBins, complexFreqBins );
     // > kiss_fft(cfg, const kiss_fft_cpx *fin, kiss_fft_cpx *fout ) :
     // >  Perform an FFT on a complex input buffer.
     // >  for a forward FFT,
     // >     fin should be  f[0] , f[1] , ... ,f[nfft-1]
     // >     fout will be   F[0] , F[1] , ... ,F[nfft-1]
     // >  Note that each element is complex and can be accessed like f[k].r and f[k].i
     // >
     // To avoid having to re-write much of the application,
     // split and re-arrange the complex frequency bins
     // as specified in SndMat_CalcComplexFft() :
     for(i=0; i<nFreqBins; ++i)
      { pfltRe[i] = complexFreqBins[i].r;
        pfltIm[i] = complexFreqBins[i].i;
      }
     free( complexFreqBins );
   }
  kiss_fft_free( kiss_FFT_object );
#endif // SWI_USE_KISS_FFT ?

  DOBINI();
} // end SndMat_CalcComplexFft()  [ for SINGLE PRECISION, i.e. 4-byte floating point ]


/***************************************************************************/
void SndMat_CalcComplexFft_Double(  // similar to SndMat_CalcComplexFft(), for double precision
          int iNrOfPoints,  // N =  number of points in the DFT *AND* in the time domain
          double *pdblRe,   // REX[] = real parts of input and output
          double *pdblIm )  // IMX[] = imaginary parts of input and output
{ // Similar as above, but using DOUBLE precision floats !
  // Not used by Spectrum Lab so don't care about optimisation. KEEP IT SIMPLE!
  int I,J,JM1,K,L,M,LE,LE2, IP;
  int NM1 = iNrOfPoints - 1;
  int ND2 = iNrOfPoints / 2;
  double UR, UI, SR, SI, TR, TI;

  DOBINI();

  // ex: m = CINT(LOG(N%)/LOG(2))
  M = 0; I=iNrOfPoints; while(I>1){ ++M; I = (I>>1); }   // -> m = log2( n )
  J = ND2;

  DOBINI();
  for(I=1; I<NM1; ++I)                  // Bit reversal sorting
   {
     if(I<J) // 1120   IF I% >= J% THEN GOTO 1190
      { TR = pdblRe[J];
        TI = pdblIm[J];
        pdblRe[J] = pdblRe[I];
        pdblIm[J] = pdblIm[I];
        pdblRe[I] = TR;
       pdblIm[I] = TI;
      }
     K = ND2;    // 1190

     while(K<=J) // 1200   IF K% > J% THEN GOTO 1240
      { J = J - K;
        K = K / 2;
      }          // 1230  GOTO 1200
     J += K;     // 1240   J% = J%+K%
   }             // 1250 NEXT I%

  DOBINI();
  for( L=1; L<=M; ++L)             // 1270 Loop for each stage
   {
     LE = 1<<L;   // 1280  LE% = CINT(2^L%)
     LE2 = LE/2;  // 1290  LE2% = LE%/2
     UR = 1;
     UI = 0;
     // Use the standard trig functions instead of table lookup.
     // (these calculations are rarely done; not worth to eliminate sin+cos here)
     SR = cos(C_PI/(double)LE2);   // Calculate sine & cosine values
     SI = -sin(C_PI/(double)LE2);
     for(J=1; J<=LE2; ++J)        // 1340 Loop for each sub DFT
      { JM1 = J-1;
        for(I=JM1; I<=NM1; I+=LE) // 1360 Loop for each butterfly
         { IP = I+LE2;
           TR = pdblRe[IP]*UR - pdblIm[IP]*UI;  // Butterfly calculation
           TI = pdblRe[IP]*UI + pdblIm[IP]*UR;
           pdblRe[IP] = pdblRe[I]-TR;
           pdblIm[IP] = pdblIm[I]-TI;
           pdblRe[I]  = pdblRe[I]+TR;
           pdblIm[I]  = pdblIm[I]+TI;
         } // NEXT I
        TR = UR;                  // 1450
        UR = TR*SR - UI*SI;
        UI = TR*SI + UI*SR;
      } // NEXT J
   } // NEXT L
  DOBINI();

} // end SndMat_CalcComplexFft_Double()


/***************************************************************************/
void SndMat_SortComplexFftForIncreasingFreqBins(
          int iNrOfPoints,    // N =  number of points in the DFT *AND* in the time domain
          float *pfltRe,      // REX[] = real parts of input and output
          float *pfltIm )     // IMX[] = imaginary parts of input and output
  // Sorts the result from SndMat_CalcComplexFft() to have the results (=the FT)
  // ordered by increasing frequency bins, so that ...
  //   - pfltRe+Im[0] contains the most negative frequency (?)
  //   - pfltRe+Im[N/2-1] contains the smallest negative frequency
  //   - pfltRe+Im[N/2] contains the DC component
  //   - pfltRe+Im[N/2+1] contains the smallest positive frequency
  //   - pfltRe+Im[N-1] contains the largest positive frequency
{
   SndMat_SwapFloatArrayHalves(pfltRe, iNrOfPoints );
   SndMat_SwapFloatArrayHalves(pfltIm, iNrOfPoints );
} // end SndMat_SortComplexFftForIncreasingFreqBins()

/***************************************************************************/
void SndMat_SortComplexFftForIncreasingFreqBins_Double(
          int iNrOfPoints,    // N =  number of points in the DFT *AND* in the time domain
          double *pfltRe,     // REX[] = real parts of input and output
          double *pfltIm )    // IMX[] = imaginary parts of input and output
  // Details in SndMat_SortComplexFftForIncreasingFreqBins() .
{
   SndMat_SwapDoubleArrayHalves(pfltRe, iNrOfPoints );
   SndMat_SwapDoubleArrayHalves(pfltIm, iNrOfPoints );
}


/***************************************************************************/
void SndMat_CalcComplexInverseFft(
       int iNrOfPoints, // N  number of points in the IDFT (?) .. IN THE TIME DOMAIN
       float *pfltRe,   // REX[] = input: real parts of frequency domain, result: re(time domain)
       float *pfltIm )  // IMX[] = input: imag. part of frequency domain, result: im(time domain)
 //  INVERSE FFT FOR COMPLEX SIGNALS  - inspired by [SGDSP] TABLE 12-5 .
 //  Upon entry, N contains the number of points in the IDFT, REX[ ] & IMX[]
 //    contain the real & imaginary parts of the complex frequency domain.
 //    The FIRST HALF [0..N/2-1] seems to contain the POSITIVE frequencies,
 //    increasing index for higher frequencies; index 0 = DC.
 //    The SECOND HALF [N/2..N-1] contains the NEGATIVE frequencies then,
 //    increasing index for higher ("less negative") frequencies.
 //    Index [N/2] contains "the most negative possible frequency".
 //
 //  Upon return, REX[ ] and IMX[ ] contain the complex time domain.
 //   All signals run from 0 to N-1.
 //  Used (for example) in \Digimodes\hell_mod.cpp  (for Fouier Hell),
 //                        \SoundUtl\FftFilter.cpp  (for I/Q output),
 //
 //  Benchmarks: Similar as for SndMat_CalcComplexFft() [details THERE] .
{
  int i;
  float fltFactor;

  DOBINI();

  for(i=0; i<iNrOfPoints; ++i) //  Change the sign of IMX[ ]
   { pfltIm[i] = -pfltIm[i];
   }

  DOBINI();
  SndMat_CalcComplexFft(    // Calculate forward FFT
              iNrOfPoints,  // N =  number of points in the DFT
              pfltRe,       // REX[] = real parts of input and output
              pfltIm );     // IMX[] = imaginary parts of input and output
  DOBINI();

  // Divide the time domain by N and change the sign of IMX[ ] :
  fltFactor = 1.0 / (float)iNrOfPoints;
  for(i=0; i<iNrOfPoints; ++i)
   { pfltRe[i] =  pfltRe[i] * fltFactor;
     pfltIm[i] = -pfltIm[i] * fltFactor;
   }
  DOBINI();

} // end SndMat_CalcComplexInverseFft()

/***************************************************************************/
void SndMat_CalcComplexInverseFft_Double(  // // similar to SndMat_CalcComplexInverseFft(), for double precision
       int iNrOfPoints, // N  number of points in the IDFT (?) .. IN THE TIME DOMAIN
       double *pfltRe,   // REX[] = input: real parts of frequency domain, result: re(time domain)
       double *pfltIm )  // IMX[] = input: imag. part of frequency domain, result: im(time domain)
 //  For details, see SndMat_CalcComplexInverseFft() !
{
  int i;
  double fltFactor;

  DOBINI();

  for(i=0; i<iNrOfPoints; ++i) //  Change the sign of IMX[ ]
   { pfltIm[i] = -pfltIm[i];
   }

  DOBINI();
  SndMat_CalcComplexFft_Double( // Calculate forward FFT
              iNrOfPoints,  // N =  number of points in the DFT
              pfltRe,       // REX[] = real parts of input and output
              pfltIm );     // IMX[] = imaginary parts of input and output
  DOBINI();

  // Divide the time domain by N and change the sign of IMX[ ] :
  fltFactor = 1.0 / (double)iNrOfPoints;
  for(i=0; i<iNrOfPoints; ++i)
   { pfltRe[i] =  pfltRe[i] * fltFactor;
     pfltIm[i] = -pfltIm[i] * fltFactor;
   }
  DOBINI();

} // end SndMat_CalcComplexInverseFft_Double()


/***************************************************************************/
void SndMat_CalcRealFft(
          int iNrOfPoints, // number of points in the time domain (input), 2^N
          float *pfltRe,   // the real input signal, also used as result (real part)
          float *pfltIm )  // output, imaginary part
 //  FFT FOR REAL SIGNALS  - inspired by [SGDSP] TABLE 12-7 / .
 //  Upon entry, iNrOfPoints contains the number of points in the "DFT" (oh really?!),
 //              pfltRe[0..iNrOfPoints-1] contains the real input signal,
 //              while values in pfltIm[ ] are ignored.
 //              The INPUT signals run from 0 to iNrOfPoints-1  .
 // Upon return, pfltRe[ ] & pfltIm[ ] contain the DFT output.
 // Output range:  A pure sine wave ... A*sin(wt)... will produce an
 //                fft output peak of (N*A/4)^2  where N is FFT_SIZE.
 //   [ this is for a HANN- or similar window, with m_fltWindowAvrg=0.5 ]
 //
 //  Note: The output signals run from  0...iNrOfPoints/2 !
 //        A "1024 point REAL FFT" produces 513(!) POINTS in re[]
 //                                     and 513(!) POINTS in im[] !
{

#if( ! SWI_USE_KISS_FFT ) /* do NOT use Mark Borgerding's radix-N 'KISS' FFT ? */
  int I, IM, IP, IP2, IPM, J,JM1, LE, LE2, NH, NM1, ND2, N4;
  float UR, UI, SR, SI, TR, TI;

#if( SWI_DEBUG_2012_04_11 )
  static CRITICAL_SECTION myCS;
  static BOOL   critter_initialized = FALSE;
  static volatile int instance_count = 0;
  static int  instance_npoints[4]   = { 0,0,0,0 };
  static long instance_thread_id[4] = { 0,0,0,0 };  // compare with SndThd_i32ThreadID, etc
#endif  // SWI_DEBUG_2012_04_11 ?

    // Info: "The Sientist and Engineer's Guide to Digital Signal Processing",
    //        www.DSPguide.com, chapter 12 :
    // Even/odd decomposition for the REAL-input FFT
    // -----------------------------------------------------------------
    // > The input signal is broken in by half using an interlaced decomposition.
    // > The N/2 even points are placed into the real real part of the
    // > time domain signal, while the N/2 odd points go into the imaginary part.
    // > An N/2 point FFT is then calculated, requiring about one-half
    // > the time as an N point FFT. The resulting frequency domain is then
    // > separated by the even/odd decomposition, resulting in the frequency
    // > spectra of the two interlaced time domain signals.
    // > These two frequency spectra are then combined into a single spectrum,
    // > just as in the last synthesis stage of the FFT.


  DOBINI();

# if( SWI_DEBUG_2012_04_11 )

     if( _isnan(pfltRe[0]) || _isnan(pfltRe[iNrOfPoints-1]) )
      { DEBUGGER_BREAK();  // "NAN" (Not A Number) .. set breakpoint here !
        return;      // 2012-04-11 : trapped here, but the reason was earlier
                     // (in CFftFilter::ProcessSamples) .
      }
# endif // SWI_DEBUG_2012_04_11 ?


#if( SWI_DEBUG_2012_04_11 ) // other attempts to find the reason for the NAN-bug..
  if( ! critter_initialized )
   { InitializeCriticalSection( &myCS );
     critter_initialized = TRUE;
   }
  EnterCriticalSection( &myCS ); // TEST 2012-04-12: NAN was less frequent(?) but still occurred !
  if( instance_count<4 )
   {  instance_npoints[instance_count] = iNrOfPoints;
    //instance_hThread[instance_count] = (long)GetCurrentThread();     // Utterly useless. Always '-2'.
      instance_thread_id[instance_count] = (long)GetCurrentThreadId(); // Truly unique thread-ID ??
      if( instance_count>0 )
       { // Can only get here WITHOUT the critical section..
         if( instance_thread_id[instance_count-1] == instance_thread_id[instance_count] )
          {  // Multiple calls FROM THE SAME THREAD-ID ? This cannot be true, but:
             instance_count = instance_count; // got here 2012-04-12 17:25 !
             // instance_thread_id[0] == instance_thread_id[1] == SndThd_i32ThreadID !!
          }
       }
   }
  ++instance_count;
  if( instance_count>1 ) // just to confirm there ARE multiple instances,
   {  // interrupting each other because different threads use this function !
      instance_count = instance_count;  // 2012-04-11 : got here with instance_count=2
      while( instance_count>1 )  // klugde to find out if multithreading caused the NAN-problem
       { Sleep(1);  // wait until the other thread returned from SndMat_CalcRealFft()
         // (this didn't work; once in this loop, it NEVER terminated. Why ?
         //  Got stuck here with instance_npoints[] = { 8192, 1024, 0, 0 } ,
         //                    instance_thread_id[] = { 4084, 3828, 0, 0 } .
         //  Most likely, a goddamned critical section prevented that the
         //  first called continued while the second caller was sleeping here.
         //  The second caller (with npoints=1024) was WinMain() -> ... Timer1Timer().
       }
      instance_count = instance_count;
   }
#endif

  // Separate even and odd points
  NH = iNrOfPoints/2-1;
  for(I=0; I<=NH; ++I)
   { pfltRe[I] = pfltRe[2*I];
     pfltIm[I] = pfltRe[2*I+1];
   }

  DOBINI();

  // Calculate N/2 point FFT complex FFT
  SndMat_CalcComplexFft(
             iNrOfPoints / 2, // N =  number of points in the DFT
             pfltRe,          // real parts of input and output
             pfltIm );        // imaginary parts of input and output
  DOBINI();
# if( SWI_DEBUG_2012_04_11 )
     if( _isnan(pfltRe[0])  )
      { DEBUGGER_BREAK();  // "NAN" (Not A Number) .. set breakpoint here !
        // 2012-04-11 : got here with pfltRe[0..N] = -NAN ("minus Not-A-Number")
        // iNrOfPoints = 32768, caller = CFftFilter::ProcessSamples() .
        // The problem was later narrowed down in FftFilter.cpp .
      }
# endif // SWI_DEBUG_2012_04_11 ?


  // Even/odd frequency domain decomposition
  NM1 = iNrOfPoints-1 ;
  ND2 = iNrOfPoints/2 ;
  N4  = iNrOfPoints/4-1;
  for(I=1; I<=N4; ++I)
   { IM = ND2-I;
     IP2 = I+ND2;
     IPM = IM+ND2;
     pfltRe[IP2] = (pfltIm[I] + pfltIm[IM]) * 0.5;
     pfltRe[IPM] =  pfltRe[IP2];
     pfltIm[IP2] = -(pfltRe[I] - pfltRe[IM]) * 0.5;
     pfltIm[IPM] = -pfltIm[IP2];
     pfltRe[I]   = (pfltRe[I] + pfltRe[IM]) * 0.5;
     pfltRe[IM]  =  pfltRe[I];
     pfltIm[I]   = (pfltIm[I] - pfltIm[IM]) * 0.5;
     pfltIm[IM]  = -pfltIm[I];
   }
  pfltRe[iNrOfPoints*3/4] = pfltIm[iNrOfPoints/4];
  pfltRe[ND2] = pfltIm[0];
  pfltIm[iNrOfPoints*3/4] = 0;
  pfltIm[ND2] = 0;
  pfltIm[iNrOfPoints/4] = 0;
  pfltIm[0]   = 0;

  DOBINI();

  // Complete the last FFT stage
  // L  = CINT(LOG(N)/LOG(2));
  LE = 0; I=iNrOfPoints; while(I>1){ ++LE; I=(I>>1); } // -> LE = log2( N )
  LE = 1<<LE;  // LE = CINT(2^LE);

  LE2= LE/2;
  UR = 1;
  UI = 0;
  SR =  cos(C_PI/(float)LE2); // only once per calculation.. no need for an array
  SI = -sin(C_PI/(float)LE2);
  for(J=1; J<=LE2; ++J)
   { JM1 = J-1;
     for( I=JM1; I<=NM1; I+=LE )
      { IP = I+LE2;
        TR = pfltRe[IP]*UR - pfltIm[IP]*UI;
        TI = pfltRe[IP]*UI + pfltIm[IP]*UR;
        pfltRe[IP] = pfltRe[I]-TR;
        pfltIm[IP] = pfltIm[I]-TI;
        pfltRe[I]  = pfltRe[I]+TR;
        pfltIm[I]  = pfltIm[I]+TI;
      }
     TR = UR;
     UR = TR*SR - UI*SI;
     UI = TR*SI + UI*SR;
   } // NEXT J%

# if( SWI_DEBUG_2012_04_11 )
     if( _isnan(pfltRe[0])  )
      { DEBUGGER_BREAK();  // "NAN" (Not A Number) .. set breakpoint here !
        // 2012-04-11 : sometimes got here WITHOUT trapping further above
      }
# endif // SWI_DEBUG_2012_04_11 ?

# if( SWI_DEBUG_2012_04_11 )
  --instance_count;
  LeaveCriticalSection( &myCS );
# endif

#endif // NOT SWI_USE_KISS_FFT ?

#if( SWI_USE_KISS_FFT ) /* use Mark Borgerding's radix-N 'KISS' FFT ? */
  int i, nFreqBins;
  kiss_fftr_cfg kiss_FFTR_object; // black box created by kiss_fftr_alloc()
  kiss_fft_cpx  *complexFreqBins;
  kiss_FFTR_object = kiss_fftr_alloc( iNrOfPoints, 0/*not inverse*/, NULL, NULL );
  nFreqBins = (iNrOfPoints/2) + 1;
  complexFreqBins = malloc( nFreqBins * sizeof(kiss_fft_cpx) );
  if( complexFreqBins != NULL )
   {
     kiss_fftr( kiss_FFTR_object, pfltRe, complexFreqBins );
     // > input timedata has nfft scalar points
     // > output freqdata has nfft/2+1 complex points
     // To avoid having to re-write much of the application,
     // split and re-arrange the complex frequency bins
     // as specified in SndMat_CalcRealFft() :
     for(i=0; i<nFreqBins; ++i)
      { pfltRe[i] = complexFreqBins[i].r;
        pfltIm[i] = complexFreqBins[i].i;
      }
     free( complexFreqBins );
   }
  kiss_fftr_free( kiss_FFTR_object );
#endif // SWI_USE_KISS_FFT ?

  DOBINI();

} // end SndMat_CalcRealFft()


/***************************************************************************/
void SndMat_CalcRealFft_Double(  // similar as SndMat_CalcRealFft(), for double precision
          int iNrOfPoints, // number of points in the time domain (input), 2^N
          double *pfltRe,   // the real input signal, also used as result (real part)
          double *pfltIm )  // output, imaginary part
 // For details, see SndMat_CalcRealFft() !
{

  int I, IM, IP, IP2, IPM, J,JM1, LE, LE2, NH, NM1, ND2, N4;
  double UR, UI, SR, SI, TR, TI;

  // Separate even and odd points
  NH = iNrOfPoints/2-1;
  for(I=0; I<=NH; ++I)
   { pfltRe[I] = pfltRe[2*I];
     pfltIm[I] = pfltRe[2*I+1];
   }

  // Calculate N/2 point FFT complex FFT
  SndMat_CalcComplexFft_Double(
             iNrOfPoints / 2, // N =  number of points in the DFT
             pfltRe,          // real parts of input and output
             pfltIm );        // imaginary parts of input and output

  // Even/odd frequency domain decomposition
  NM1 = iNrOfPoints-1 ;
  ND2 = iNrOfPoints/2 ;
  N4  = iNrOfPoints/4-1;
  for(I=1; I<=N4; ++I)
   { IM = ND2-I;
     IP2 = I+ND2;
     IPM = IM+ND2;
     pfltRe[IP2] = (pfltIm[I] + pfltIm[IM]) * 0.5;
     pfltRe[IPM] =  pfltRe[IP2];
     pfltIm[IP2] = -(pfltRe[I] - pfltRe[IM]) * 0.5;
     pfltIm[IPM] = -pfltIm[IP2];
     pfltRe[I]   = (pfltRe[I] + pfltRe[IM]) * 0.5;
     pfltRe[IM]  =  pfltRe[I];
     pfltIm[I]   = (pfltIm[I] - pfltIm[IM]) * 0.5;
     pfltIm[IM]  = -pfltIm[I];
   }
  pfltRe[iNrOfPoints*3/4] = pfltIm[iNrOfPoints/4];
  pfltRe[ND2] = pfltIm[0];
  pfltIm[iNrOfPoints*3/4] = 0;
  pfltIm[ND2] = 0;
  pfltIm[iNrOfPoints/4] = 0;
  pfltIm[0]   = 0;


  // Complete the last FFT stage
  LE = 0; I=iNrOfPoints; while(I>1){ ++LE; I=(I>>1); } // -> LE = log2( N )
  LE = 1<<LE;  // LE = CINT(2^LE);

  LE2= LE/2;
  UR = 1;
  UI = 0;
  SR =  cos(C_PI/(double)LE2); // only once per calculation.. no need for an array
  SI = -sin(C_PI/(double)LE2);
  for(J=1; J<=LE2; ++J)
   { JM1 = J-1;
     for( I=JM1; I<=NM1; I+=LE )
      { IP = I+LE2;
        TR = pfltRe[IP]*UR - pfltIm[IP]*UI;
        TI = pfltRe[IP]*UI + pfltIm[IP]*UR;
        pfltRe[IP] = pfltRe[I]-TR;
        pfltIm[IP] = pfltIm[I]-TI;
        pfltRe[I]  = pfltRe[I]+TR;
        pfltIm[I]  = pfltIm[I]+TI;
      }
     TR = UR;
     UR = TR*SR - UI*SI;
     UI = TR*SI + UI*SR;
   } // NEXT J%

  DOBINI();

} // end SndMat_CalcRealFft_Double()


/***************************************************************************/
void SndMat_CalcRealInverseFft(
          int iNrOfPoints, // N  number of points in the IDFT (?!?) .. IN THE TIME DOMAIN
          float *pfltRe,   // REX[] = real parts of frequency domain, AND result
          float *pfltIm )  // IMX[] = imaginary parts of frequency domain
 //  INVERSE FFT FOR REAL SIGNALS  - inspired by [SGDSP] TABLE 12-6 .
 //  Upon entry, N contains the number of points in the IDFT ("time domain" ?) ,
 //  REX[ ] and IMX[ ] contain the real & imaginary parts of the frequency domain
 //  running from index 0 to N%/2.  The remaining samples in REX[] and IMX[]
 //  are ignored. Upon return, REX[ ] contains the real time domain, IMX[ ]
 //  contains zeros. (SET to zero, cannot be used to check the algorithm !)
{

#if( ! SWI_USE_KISS_FFT ) /* do NOT use Mark Borgerding's radix-N 'KISS' FFT ? */
  int K;
  float fltFactor;

  DOBINI();

  for(K=(iNrOfPoints/2+1); K<iNrOfPoints; ++K)  // Make frequency domain symmetrical
   { pfltRe[K] =  pfltRe[iNrOfPoints-K];        // (as in [SGDSP] Table 12-1)
     pfltIm[K] = -pfltIm[iNrOfPoints-K];
   }

  DOBINI();
  for(K=0; K<iNrOfPoints; ++K)       // Add real and imaginary parts together
   { pfltRe[K] =  pfltRe[K]+pfltIm[K];
   }

  DOBINI();
  // Calculate forward real DFT (TABLE 12-6, ex: "GOSUB 3000" )
  SndMat_CalcRealFft( // Calculate the REAL FFT ..
          iNrOfPoints, // N  number of points in the DFT (for example 1024 points)
          pfltRe,      // REX[] = the real input signal, also used as result
          pfltIm );    // IMX[] = output, imaginary part (for example 513(!) points)
     // 2008-08-15: Crashed in SndMat_CalcRealFft() with an access violation,
     //    after switching from the "default configuration" to
     //    "SDR-IQ with converter and audio filter" .  Reason: Multithreading ?
  DOBINI();

  // Add real and imaginary parts together and divide the time domain by N
  fltFactor = 1.0 / (float)iNrOfPoints;
  for(K=0; K<iNrOfPoints; ++K)  // see: iNrOfPoints are the number of samples IN THE TIME DOMAIN again !
   {
     pfltRe[K] = (pfltRe[K]+pfltIm[K]) * fltFactor;
     pfltIm[K] = 0; // set IMAGINARY part to zero for the sake of "mathematical correctness"
   }

#endif // NOT SWI_USE_KISS_FFT ?

#if( SWI_USE_KISS_FFT ) /* use Mark Borgerding's radix-N 'KISS' FFT ? */
  int i, nFreqBins;
  kiss_fftr_cfg kiss_FFTR_object; // black box created by kiss_fftr_alloc()
  kiss_fft_cpx  *complexFreqBins;
  kiss_FFTR_object = kiss_fftr_alloc( iNrOfPoints, 1/*inverse*/, NULL, NULL );
  nFreqBins = (iNrOfPoints/2) + 1;
  complexFreqBins = malloc( nFreqBins * sizeof(kiss_fft_cpx) );
  if( complexFreqBins != NULL )
   {
     // To avoid having to re-write much of the application,
     // combine and re-arrange the complex frequency bins
     // into the format required by the KISS inverse real FFT (kiss_fftri) :
     for(i=0; i<nFreqBins; ++i)
      { complexFreqBins[i].r = pfltRe[i];
        complexFreqBins[i].i = pfltIm[i];
      }
     kiss_fftri( kiss_FFTR_object, complexFreqBins/*in*/, pfltRe/*out*/  );
     // > input  freqdata has nfft/2+1 complex points
     // > output timedata has nfft scalar points
     free( complexFreqBins );
   }
  kiss_fftr_free( kiss_FFTR_object );
#endif // SWI_USE_KISS_FFT ?

  DOBINI();

} // end SndMat_CalcRealInverseFft()


/***************************************************************************/
void SndMat_CalcRealInverseFft_Double(
          int iNrOfPoints, // N  number of points in the IDFT (?!?) .. IN THE TIME DOMAIN
          double *pfltRe,  // REX[] = real parts of frequency domain, AND result
          double *pfltIm)  // IMX[] = imaginary parts of frequency domain
 // Same purpose as SndMat_CalcRealInverseFft(), but for DOUBLE PRECISION .
{
  int K;
  double fltFactor;

  DOBINI();

  for(K=(iNrOfPoints/2+1); K<iNrOfPoints; ++K)  // Make frequency domain symmetrical
   { pfltRe[K] =  pfltRe[iNrOfPoints-K];        // (as in [SGDSP] Table 12-1)
     pfltIm[K] = -pfltIm[iNrOfPoints-K];
   }

  DOBINI();
  for(K=0; K<iNrOfPoints; ++K)       // Add real and imaginary parts together
   { pfltRe[K] =  pfltRe[K]+pfltIm[K];
   }

  DOBINI();
  // Calculate forward real DFT (TABLE 12-6, ex: "GOSUB 3000" )
  SndMat_CalcRealFft_Double( // Calculate the REAL FFT ..
          iNrOfPoints, // N  number of points in the DFT (for example 1024 points)
          pfltRe,      // REX[] = the real input signal, also used as result
          pfltIm );    // IMX[] = output, imaginary part (for example 513(!) points)
     // 2008-08-15: Crashed in SndMat_CalcRealFft() with an access violation,
     //    after switching from the "default configuration" to
     //    "SDR-IQ with converter and audio filter" .  Reason: Multithreading ?
  DOBINI();

  // Add real and imaginary parts together and divide the time domain by N
  fltFactor = 1.0 / (double)iNrOfPoints;
  for(K=0; K<iNrOfPoints; ++K)  // see: iNrOfPoints are the number of samples IN THE TIME DOMAIN again !
   {
     pfltRe[K] = (pfltRe[K]+pfltIm[K]) * fltFactor;
     pfltIm[K] = 0; // set IMAGINARY part to zero for the sake of "mathematical correctness"
   }

  DOBINI();

} // end SndMat_CalcRealInverseFft_Double()


/***************************************************************************/
T_Float SndMat_MakeFftWindowTable(
           T_Float *pfltWindowTbl, // [out] window table, 32- or 64-bit floating point
           int   iFftSize,         // [in] number of points (usually 2^n)
           int   iWindowFunction)  // [in] FFT_WINDOW_HANN, etc etc
  // Builds a table with one of the usual FFT windowing functions,
  //  and returns the AVERAGE of that window (which is usually ~ 0.5) .
{
  int i;
  T_Float sum = 0.0;
  double dbl, z;

  DOBINI();
   for(i=0; i<iFftSize; i++) // fill table with FFT WINDOW FUNCTION ..
    { // 'z' introduced 2014-05-02 to simplify the implementation
      //     of the 'new' flat-top windows, as used in [HFTWIN],
      //     appendix C, "List of window functions",
      //     same for MOST (but not all) window functions calculated
      //     as a sum of cosines.
      // Example: C.5 Hamming window
      //        2 * pi * i
      //   z = ------------  ,     where i = 0... N-1,   N=number of points (discrete window length)
      //           N
      // Note 1: Engineers don't use 'j' as a counting index, because  j * j := -1 .
      // Note 2: [HFTWIN] divides by N, not N-1; aka use as "periodic" window.
      //         Because in SL, these window is applied to periodic short-term fourier transforms,
      //         dividing by N (i.e. "periodic" window) seems appropriate.
      //
      //   w[i] = 0.54 - 0.46 * cos(z)   [coefficients of the Hamming window]
      //
      z = (2.0 * C_PI * (double)i) / (double)iFftSize; // divide by N=iFftSize or (N-1) ?
      //
      // The above 'z' applies to the following window functions as implemented in [HFTWIN] :
      //    Hamming, Blackman-Harris, all Nuttall windows, 'Salvatore' flat-top windows,
      //    'Old HP flat-top window', 'Stanford Research' flat-top window,
      //    and all 'New flat-top windows' by G. Heinzel (D.3) .
      // The above 'z' does definitely NOT apply to the following windows in [HFTWIN] :
      //    Kaiser,
      switch(iWindowFunction) // Pick a data windowing function:
       {
        case FFT_WINDOW_RECTANGLE:   // rectangle (bad but "fast reacting")
              pfltWindowTbl[i] = 1.0;
              break;
        case FFT_WINDOW_HAMMING:     // Hamming
              // Not significantly better (compared to the Hann window) .
              // ex: From http://en.wikipedia.org/wiki/Window_function :
              // pfltWindowTbl[i] = .53836 - .46164*cos( (2.0*C_PI*(float)i)/(float)(iFftSize-1));
              // From [HFTWIN] C.5 Hamming window:
              pfltWindowTbl[i] = 0.54 - 0.46 * cos(z);
              break;
        case FFT_WINDOW_HANN:        // Hann (still a good tradeoff between frequency resolution and dynamic range!)
              // From http://en.wikipedia.org/wiki/Window_function :
              // > The Hann window is sometimes called the "Hanning" window,
              // > in analogy to the Hamming window. However, this is incorrect,
              // > because the windows were named after Julius von Hann
              // > and Richard Hamming, respectively.
              pfltWindowTbl[i] = .5 - .5*cos( (2.0*C_PI*(float)i)/(float)(iFftSize-1) );
              break;
        case FFT_WINDOW_GAUSS:       // Gauss window
              // Also found at http://en.wikipedia.org/wiki/Window_function .
              dbl = ( (double)i-(iFftSize-1)/2.0 ) / ( 0.4 *(iFftSize-1)/2.0 );
              pfltWindowTbl[i] = exp( -.5 * dbl * dbl );
              break;
        case FFT_WINDOW_NUTTALL4B:  // Nuttall4b :
              // Low resolution but large dynamic range (low sidelobes, theor. 93 dB below main lobe).
              // See discussion / comparison with other (newer) Flat-Top windows in [HFTWIN] :
              // > The window called Nuttall4b is derived by requiring a SLDR of f^-3
              // > for a four-term function and using the remaining two degrees of freedom
              // > to minimize the PSLL (Peak SideLobe Level) .
              // > NENBW = 2.0212 bins  (Normalized Equivalent Noise BandWidth)
              // > PSLL  = -93.3 dB
              // > The first zero  is located  at  f  =  4.00  bins.
              // > The highest  sidelobe  is - 93.3 dB,  located  at f  =  4.57 bins.
              // > At the optimal overlap of 66.3%, the amplitude flatness is 0.924,
              // > the power flatness is 0.715, and the overlap correlation is 0.233 .
              //
              // Due to their almost 'flat top', this window (as similar other)
              // is utterly un-suited for frequency measurements using SL's interpolation !
              //
              // ex: pfltWindowTbl[i] = .355768
              //              - .487396*cos( (2.0*C_PI*i)/(iFftSize-1) )
              //               + .144232*cos( (4.0*C_PI*i)/(iFftSize-1) )
              //               - .012604*cos( (6.0*C_PI*i)/(iFftSize-1) );
              pfltWindowTbl[i] = .355768 /* "c0" */
                               - .487396 /* "c1" */ * cos( 1.0/*"k"*/ * z )
                               + .144232 /* "c2" */ * cos( 2.0/*"k"*/ * z )
                               - .012604 /* "c3" */ * cos( 3.0/*"k"*/ * z );
              // In http://en.wikipedia.org/wiki/Window_function,
              //    the above window function is titled
              //  "Nuttall window, continuous first derivative"
              //    but the denominator (in "z") is N-1, not N  !
              break;

#if(0)  // old "flat top" window.  Which ? There are hundreds of flat-top windows out there,
        // and THIS ONE was one of the worst ever seen (compared to [HFTWIN]'s) ..
        case FFT_WINDOW_FLAT_TOP:    // Flat Top (for specialists too)
              // Low resolution, mediocre stopband attenuation,
              // but 'low passband ripple' for whatever it's worth.
              pfltWindowTbl[i] = 1.0
                               - 1.93*cos( (2.0*C_PI*i)/(iFftSize-1) )
                               + 1.29*cos( (4.0*C_PI*i)/(iFftSize-1) )
                               - 0.388*cos( (6.0*C_PI*i)/(iFftSize-1) )
                               + 0.032*cos( (8.0*C_PI*i)/(iFftSize-1) );
              break;
#endif

        case FFT_WINDOW_FLATTOP5F: // "Fast decaying 5-term flat top window" (from [HFTWIN] D.1.3, "SFT5F")
              // See discussion / comparison with other ("newer") Flat-Top windows in [HFTWIN] :
              // > NENBW = 4.3412 bins
              // > PSLL  = -57.3 dB
              // > emax = 0.0025 dB = 0.0282 %.
              pfltWindowTbl[i] = 0.1881   /* "c0" */
                               - .36923   /* "c1" */ * cos( 1.0/*"k"*/ * z )
                               + .28702   /* "c2" */ * cos( 2.0/*"k"*/ * z )
                               - .13077   /* "c3" */ * cos( 3.0/*"k"*/ * z )
                               + .02488   /* "c4" */ * cos( 4.0/*"k"*/ * z );
              break;

        case FFT_WINDOW_FLATTOP5M: // "Minimum sidelobe 5-term flat top window" (from [HFTWIN] D.1.6, "SFT5M")
              // See discussion / comparison with other ("newer") Flat-Top windows in [HFTWIN] :
              // > NENBW = 3.8852 bins
              // > PSLL  = -89.9 dB
              // > emax = 0.0039 dB = 0.0449 %.
              pfltWindowTbl[i] = 0.209671 /* "c0" */
                               - .407331  /* "c1" */ * cos( 1.0/*"k"*/ * z )
                               + .281225  /* "c2" */ * cos( 2.0/*"k"*/ * z )
                               - .092669  /* "c3" */ * cos( 3.0/*"k"*/ * z )
                               + .0091036 /* "c4" */ * cos( 4.0/*"k"*/ * z );
              break;

        case FFT_WINDOW_HFT95: // "Heinzel Flat-Top -95 dB sidelobe" (from [HFTWIN] D.3.2, "HFT95")
              // > This window was optimized for the lowest sidelobe level
              // > that is achieveable with 4 cosine terms.
              // > NENBW = 3.8112 bins
              // > PSLL  = -95.0 dB
              // > emax = 0.0044 dB = 0.0507 %.
              pfltWindowTbl[i] = 1.0 - 1.9383379*cos(z)     + 1.3045202*cos(2.0*z)
                                     - 0.4028270*cos(3.0*z) + 0.0350665*cos(4.0*z);
              break;

        case FFT_WINDOW_HFT144D: // "Heinzel Flat-Top -144 dB sidelobe" (from [HFTWIN] D.3.5, "HFT144D")
              // > This window was optimized for the lowest sidelobe level
              // > that is achieveable with 6 cosine terms (..)
              // > NENBW = 4.5386 bins
              // > PSLL  = -144.1 dB  (highest sidelobe located at f +/- 7.07 bins)
              // > emax  = 0.0021 dB
              pfltWindowTbl[i] = 1.0 - 1.96760033*cos(z)     + 1.57983607*cos(2.0*z)
                                     - 0.81123644*cos(3.0*z) + 0.22583558*cos(4.0*z)
                                     - 0.02773848*cos(5.0*z) + 0.00090360*cos(6.0*z);
              break;

     case FFT_WINDOW_HFT196D  :  /* "Heinzel Flat-Top -196 dB sidelobe" (from [HFTWIN] D.3.7, "HFT196D") */
              // > This window was optimized for the lowest sidelobe level
              // > that is achieveable with 8 cosine terms (..)
              // > NENBW = 5.1134 bins
              // > PSLL  = -196.2 dB  (highest sidelobe located at f +/- 9.06 bins)
              // > Optimal overlap = 82.3 %
              pfltWindowTbl[i] = 1.0 - 1.979280420*cos(z)     + 1.710288951*cos(2.0*z)
                                     - 1.081629853*cos(3.0*z) + 0.448734314*cos(4.0*z)
                                     - 0.112376628*cos(5.0*z) + 0.015122992*cos(6.0*z)
                                     - 0.000871252*cos(7.0*z) + 0.000011896*cos(8.0*z);
              break;
     case FFT_WINDOW_HFT248D  :  /* "Heinzel Flat-Top -248 dB sidelobe" (from [HFTWIN] D.3.9, "HFT248D") */
              // > This window was optimized for the lowest sidelobe level
              // > that is achieveable with 10 cosine terms (..)
              // > NENBW =  5.6512 bins
              // > PSLL  = -248.4 dB  (highest sidelobe located at f +/- 13.37 bins)
              // > Optimal overlap = 84.1 %
              // Exceeds the dynamic range of 32-bit floating point arithmetics,
              // but WB decided to keep this remarkable function for future projects
              // (which would use 'double precision' floating point numbers
              //  in the entire processing chain) .
              //
              pfltWindowTbl[i] = 1.0 - 1.985844164102*cos(z)     + 1.791176438506*cos(2.0*z)
                                     - 1.282075284005*cos(3.0*z) + 0.667777530266*cos(4.0*z)
                                     - 0.240160796576*cos(5.0*z) + 0.056656381764*cos(6.0*z)
                                     - 0.008134974479*cos(7.0*z) + 0.000624544650*cos(8.0*z)
                                     - 0.000019808998*cos(9.0*z) + 0.000000132974*cos(10.0*z);
              break;


        default:
              iWindowFunction = FFT_WINDOW_RECTANGLE;
              pfltWindowTbl[i] = 1.0;   //rectangle
              break;
       } // end switch( window_function )
      sum += pfltWindowTbl[i];  // for 'window weighting factor'
    }  // end for

  DOBINI();
  sum /= (float)iFftSize;  // -> RECT:1.0,  HAMMING:0.54,  HANN:0.5
  return sum;  // returns the AVERAGE of the window function (later used to normalize amplitudes)
}

#if(0)    // Not used - for RESAMPLING we'll use LIBRESAMPLE now .
typedef struct // T_SndMat_Resampler
{
  float fltPhase;
  int   iDstIndex;
  float fltDecimRatio;   // decimation ratio
  float fltInterpRatio;  // interpolation ratio
  #define SNDMAT_MAX_RESAMPLER_COEFFS 256
  float fltCoeffs[SNDMAT_MAX_RESAMPLER_COEFFS];
  int   iCoeffLength;
} T_SndMat_Resampler;

int SndMat_ReSample( T_SndMat_Resampler *pReSampler,
                      float *pfltSrc, int iSrcLen,
                      float *pfltDst, int iDstLen )
  // Sample rate conversion for ARBITRARY sample rates.
  // Required when input- and output sampling rate are not integer multiples.
  // Based on info found in SpectraVue (for the SDR-IQ device)
{ int i,i2,ci,k,h;
  float fltOutput;
  float *pfltCoeff;
  float fltPhase = pReSampler->fltPhase;
  int iDstIndex = pReSampler->iDstIndex;
  for(i=0; i<iSrcLen; ++i )
   {
     while( fltPhase < pReSampler->fltInterpRatio )
      {
        h=0;
        k=(int)fltPhase;
        fltPhase += pReSampler->fltDecimRatio;
        ci = pReSampler->iCoeffLength-1;
        pfltCoeff = pReSampler->fltCoeffs[k];
        fltOutput = 0.0;
        for( i2=1; i2<pReSampler->iCoeffLength; ++i2 )
         { fltOutput += ( pReSampler->fltHistory[h++] * pfltCoeff[ci--] );
         }
        fltOutput += (pfltSrc[i] * pCoeff[0]);
        pfltDst[iDstIndex++] = fltOutput;
        if( iDstIndex >= iDstLen )
            iDstIndex = 0;
      } // end while( fltPhase... )
     phase -= pReSampler->fltInterpRatio;

     // Update history array (non-circular FIFO)
     float *pfltHistory = pReSampler->fltHistory;
     for( int m=0; m<pReSampler->iCoeffLength-1; ++m)
      { pfltHistory[m] = pfltHistory[m+1];
      }
     pfltHistory[pReSampler->iCoeffLength-1] = pfltSrc[i];
   } // end  for(i... )
} // end SndMat_ReSampler()
#endif // (0)



/***************************************************************************/
void SndMat_InitFilter( T_FILTER_DATA *filter, BOOL all  )
  /* Initializes SOME or ALL components of a T_FILTER_DATA struct.
   * parameter: all = TRUE : reset 'all', including the filter coefficients.
   *            all = FALSE: initialize queues and 'z' values only.
   */
{
 int i;

  if(filter==NULL) return;
  if(all)
   {
     filter->max_coeff = 0;
     filter->iir_type  = FALSE;
     filter->cascaded  = 0;    // 0=one big filter
     for(i=0;i<=FILTER_MAX_COEFFICIENTS;++i)
      {
        filter->alpha[i] = 0.0;
        filter->beta [i] = 0.0;
      }
   } // end if (all)
  for(i=0;i<=FILTER_MAX_COEFFICIENTS;++i)
   {
    filter->z[i] = 0.0;     // storage register for every filter stage
   }
  filter->p_circular_input_queue = &filter->z[0];
} // end SndMat_InitFilter()


/***************************************************************************/
void SndMat_RunThroughFilter(
          T_Float *input_samples,
          T_Float *output_samples,
          int    number_of_samples,
          T_FILTER_DATA *filter  )
    /* Let a block of samples run through a conventional digital filter
     * with convolution in the time domain (IIR or FIR, no "FFT Filter").
     * Used by the real-time audio processing thread
     * and by the digimode-decoder (for MARK and SPACE separation).
     * The filter coefficients and the filter's 'memories' are passed
     * in a pointer to a T_FILTER_DATA struct (see SoundThd.h) .
     * Note: It doesn't hurt if the input and output blocks are the same memory.
     */
{
  INT jmax,stage,n_stages;
  INT    i,j,k;
  T_Float x,y,z;
  T_FilterFloat *alpha, *beta;
  T_FilterFloat *queue_in_ptr, *coeff_ptr, *queue_rd_ptr;


  if (filter->max_coeff > FILTER_MAX_COEFFICIENTS)
      filter->max_coeff = FILTER_MAX_COEFFICIENTS;

  if( (filter->iir_type) && (filter->cascaded==0) )
    {//------------------------------------------------------------------
     // algorithm for all non-cascaded IIR - filters  with <N>-th order:
     for(i=0; i<number_of_samples; ++i)
      {
       x = input_samples[i];  // filter input = "X"
       y = x * filter->alpha[0] + filter->z[0];
         // Optionally limit the output of the IIR filter
         // because it may oscillate !
         // Notice: Sometime just this LIMITER causes oscillation !!!!
         if (y>C_SND_MATH_FILTER_Y_MAX)
             {
               // ++SOUND_Filter_limit_y_overload_counter;
               y=C_SND_MATH_FILTER_Y_MAX;
             }
         if (y<-C_SND_MATH_FILTER_Y_MAX)
             {
               // ++SOUND_Filter_limit_y_overload_counter;
               y=-C_SND_MATH_FILTER_Y_MAX;
             }
         for(j=1; j<filter->max_coeff; ++j)
          {
           filter->z[j-1]
             = x * filter->alpha[j]
                 + filter->z[j]
             - y * filter->beta[j];
          }
         filter->z[j-1] = x * filter->alpha[j]
                        - y * filter->beta[j];
         output_samples[i] = y;
      } // end for <all samples in the filter buffer>
     }
   else // no "non-cascaded" IIR filter:
   if( (filter->iir_type) && (filter->cascaded==3/*!*/ ) )
    {//------------------------------------------------------------------
     // algorithm for CASCADED SECOND ORDER  IIR - filters
     // consisting of 2nd(!) order filters (aka "biquads") in a chain.
     // Example: 5 stages *  2 nd-order IIR's in a chain:
     //          cascaded  = 3 (3 coefficients per stage)
     //          max_coeff = 14 (coefficient indices 0..14 are valid)
     n_stages = (filter->max_coeff+1) / 3;
     for(i=0; i<number_of_samples; ++i)
      {
         x = input_samples[i];  // filter input = "X"
         k = 0;                 // coefficient array index
         alpha = filter->alpha;  // (one pointer ref less for access in loop)
         for(stage=0; stage<n_stages; ++stage)
          {  // y0 = x*alpha0 + z0
           y = x * (*alpha++) + filter->z[k];   // k=0: no "beta" coefficient here !
           if (y>C_SND_MATH_FILTER_Y_MAX)
            { // avoid floating point errors (crash!) when filter oscillates:
              y=C_SND_MATH_FILTER_Y_MAX;
              // 2011-03-23: Got here with state=0, 1-kHz LP @ 192kS, y=1.04e20,
              //    filter->alpha[0..2] = 0.00411564, 0.008231,  0.00411564
              //    filter->beta [0..2] = 0.0       , 1.990531, -0.9905792
            }
           ++k;
           // z0 = x*alpha1 + z1 - y*beta1
           filter->z[k-1] = x * (*alpha++) + filter->z[k]
                          - y * filter->beta[k];
           ++k;
           // z1 = x*alpha2 - y*beta2
           filter->z[k-1] = x * (*alpha++)
                          - y * filter->beta[k];
           x = y; // output of this stage = input of next stage
           ++k;   // coefficient index for next stage of the cascade
          } // end for(stage... )
         output_samples[i] = y;
      } // end for <all samples in the filter buffer>
     }
    else // no cascaded design of SECOND order IIR filters:
   if( (filter->iir_type) && (filter->cascaded>=2) )
    {//------------------------------------------------------------------
     // algorithm for ANY CASCADED IIR - filters
     // consisting of 3rd or 4th order filters in a chain.
     // (slower than the above code for chained SECOND-ORDER IIR filters!)
     // Example: 5 stages *  2 nd-order IIR's in a chain:
     //          cascaded  = 3 (3 coefficients per stage)
     //          max_coeff = 14 (coefficient indices 0..14 are valid)
     n_stages = (filter->max_coeff+1)
               / filter->cascaded;
     jmax     =  filter->cascaded - 2/*!*/ ;   // usually: jmax=1
     for(i=0; i<number_of_samples; ++i)
      {
       x = input_samples[i];  // filter input = "X"
       k = 0;   // coefficient array index
       alpha = filter->alpha;  // (one pointer ref less for access in loop)
       for(stage=0; stage<n_stages; ++stage)
          {  // y0 = x*alpha0 + z0
           y = x * (*alpha++) + filter->z[k];   // k=0,3,.. no "beta" coefficient here !
#if(0)
           // limit the output of the IIR filter because it may oscillate !
           // Notice: Sometime just this LIMITER causes oscillation !!!!
           if (y>C_SND_MATH_FILTER_Y_MAX)
             { y=C_SND_MATH_FILTER_Y_MAX;
               ++SOUND_Filter_limit_y_overload_counter;
             }
           if (y<SOUND_Filter_limit_z_min)
             { y=SOUND_Filter_limit_z_min;
               ++SOUND_Filter_limit_y_overload_counter;
             }
#endif // removed to improve speed
           for(j=1; j<=jmax; ++j)
            {
             ++k;
             // z0 = x*alpha1 + z1 - y*beta1
             filter->z[k-1]
              = x * (*alpha++) + filter->z[k] - y * filter->beta[k];
            }
           ++k;
           // z1 = x*alpha2 - y*beta2
           filter->z[k-1] = x * (*alpha++) - y * filter->beta[k];  // k=2
           x = y; // output of this stage = input of next stage
           ++k;   // coefficient index for next stage of the cascade
          } // end for(stage... )
         output_samples[i] = y;
       } // end for <all samples in the filter buffer>
     }
    else // no IIR filter but FIR filter (no feedback, no "beta" coeffs)
    if (filter->iir_type==0)
     {//-----------------------------------------------------------------
      //  Test results for a 44th-order FIR lowpass on a 266MHz-P2 :
      //   old algorithm (without global adder):  4.6 us / sample
      //   new algorithm (with global adder):     2.6 us / sample
     T_FilterFloat *input_queue_end = &filter->z[filter->max_coeff];
     // keep the circular buffer pointer 'valid' all the time :
     if(  (filter->p_circular_input_queue < &filter->z[0])
        ||(filter->p_circular_input_queue > input_queue_end) )
           filter->p_circular_input_queue = &filter->z[0];
     // use automatic copies of these variables to improve speed:
     queue_in_ptr = filter->p_circular_input_queue;
     for( i=0; i<number_of_samples; i++ )  // put new samples into Queue
      {
       if( --queue_in_ptr < &filter->z[0] )  // deal with wraparound
             queue_in_ptr = input_queue_end;
       *queue_in_ptr = input_samples[i];  // filter input = "X"
       queue_rd_ptr = queue_in_ptr;    // pointer to READ from input queue
       coeff_ptr = &filter->alpha[0];  // pointer to read from coefficient table
       y = 0.0;                  // clear the 'global adder'
       j = filter->max_coeff+1;  // j=45 for 44th-order filter (45 loops)
       while( j-- )         // do the MAC's
        {
         y += ( (*queue_rd_ptr++) * (*coeff_ptr++) );
         if( queue_rd_ptr > input_queue_end ) // deal with wraparound
             queue_rd_ptr = &filter->z[0];
        }
       output_samples[i] = y; // filter output = sum from all 'taps'
      }
     // save position in circular delay line :
     filter->p_circular_input_queue = queue_in_ptr;
     }  // end if <FIR filter>

} // end SndMat_RunThroughFilter()


//---------------------------------------------------------------------------
// ADPCM, used by OpenWebRX to compress audio and waterfall strips (spectra).
// > Copyright 1992 by Stichting Mathematisch Centrum, Amsterdam, The Netherlands.
// >                All Rights Reserved
// > Permission to use, copy, modify, and distribute this software and its
// > documentation for any purpose and without fee is hereby granted,
// > provided that the above copyright notice appear in all copies and that
// > both that copyright notice and this permission notice appear in
// > supporting documentation, and that the names of Stichting Mathematisch
// > Centrum or CWI not be used in advertising or publicity pertaining to
// > distribution of the software without specific, written prior permission.
// >
// > STICHTING MATHEMATISCH CENTRUM DISCLAIMS ALL WARRANTIES WITH REGARD TO
// > THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
// > FITNESS, IN NO EVENT SHALL STICHTING MATHEMATISCH CENTRUM BE LIABLE
// > FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// > WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
// > ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
// > OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
// >
// > Intel/DVI ADPCM coder/decoder.
// >
// > The algorithm for this coder was taken from the IMA Compatability Project
// > proceedings, Vol 2, Number 2; May 1992.
// > IMA/DVI ADPCM is a standard that compresses 16-bit sound data into only 4-bits.
// > It is thought to be faster then Microsoft's ADPCM implementation.
// >  
//---------------------------------------------------------------------------


const int ADPCM_iIndexTable[16] = {
   -1, -1, -1, -1,  // +0 - +3, decrease the step size
    2, 4, 6, 8,     // +4 - +7, increase the step size
   -1, -1, -1, -1,  // -0 - -3, decrease the step size
    2, 4, 6, 8,     // -4 - -7, increase the step size
};

const int ADPCM_iStepsizeTable[89] = {
 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 21, 23, 25, 28, 31, 34, 37, 41, 45,
 50,  55,  60, 66, 73, 80, 88, 97, 107, 118, 130, 143, 157, 173, 190, 209, 230,
 253, 279, 307, 337, 371, 408, 449, 494, 544, 598, 658, 724, 796, 876, 963,
 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327,
 3660, 4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487,
 12635, 13899, 15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
};

void ADPCM_Init( T_ADPCM_Codec *pCodec )
{
  memset( pCodec, 0, sizeof(T_ADPCM_Codec) );
}


int ADPCM_Decode( T_ADPCM_Codec *pCodec, BYTE deltaCode )
{
   // Get the current step size
   int step = ADPCM_iStepsizeTable[pCodec->index];

   // Construct the difference by scaling the current step size
   // This is approximately: difference = (deltaCode+.5)*step/4
   int difference = step>>3;
   if ( deltaCode & 1 ) difference += step>>2;
   if ( deltaCode & 2 ) difference += step>>1;
   if ( deltaCode & 4 ) difference += step;
   if ( deltaCode & 8 ) difference = -difference;

   // Build the new sample
   pCodec->prevValue += difference;
   if( pCodec->prevValue > 32767)
    {  pCodec->prevValue = 32767;
    }
   else
   if( pCodec->prevValue < -32768)
    {  pCodec->prevValue = -32768;
    }

   // Update the step for the next sample
   pCodec->index += ADPCM_iIndexTable[deltaCode];
   if( pCodec->index < 0)
    {  pCodec->index = 0;
    }
   else
   if( pCodec->index > 88)
    {  pCodec->index = 88;
    }

   return pCodec->prevValue;
} // end ADPCM_Decode()

BYTE ADPCM_Encode( T_ADPCM_Codec *pCodec, short sample )
{
   int diff, step, deltaCode;

   if( pCodec->index < 0 )
    {  pCodec->index = 0;  // <- should never happen - set breakpoint HERE !
    }
   if( pCodec->index > 88 )
    {  pCodec->index = 88; // <- should never happen - set breakpoint HERE, too !
    }


   diff = sample - pCodec->prevValue;
   step = ADPCM_iStepsizeTable[pCodec->index];  // <- often crashed HERE with an access violation
   deltaCode = 0;

   // Compute difference with previous value,
   //    and convert it into a 4-bit "delta" code.
   // [out] deltaCode : bit 3 = sign, bits 2..0 = stepwidth
   // Set sign bit (WB: sign bit ? guess that's supposed to be the "8" in "deltaCode".. )
   if (diff < 0)
    { deltaCode = 8;
      diff = -diff;
    }

   // This is essentially deltaCode = (diff<<2)/step,
   // except the roundoff is handled differently.
   if ( diff >= step )
    {  deltaCode |= 4;
       diff -= step;
    }
   step >>= 1;
   if ( diff >= step )
    {  deltaCode |= 2;
       diff -= step;
    }
   step >>= 1;
   if ( diff >= step )
    {  deltaCode |= 1;
       diff -= step;
    }

   ADPCM_Decode( pCodec, deltaCode );  // update state

   return (BYTE)deltaCode; // returns a FOUR bit value that the caller usually combines
                           // into a BYTE. But forget about the "bufferstep"-flag
                           // seen in the 1992 implementation - the k-th sample
                           // goes into bits 3..0, the k+1-th sample into bits 7..4 .
} // end ADPCM_Encode()

void ADPCM_EncodeBlock( T_ADPCM_Codec *pCodec, int *piInput, BYTE *pbOutput, int nSamples )
   // Because ADPCM stores TWO samples in a BYTE, the input length must always
   // be a multiple of two.
{
  int i, k=0;
  for( i=0;i<nSamples/2;i++)
   {
     pbOutput[k]  = ADPCM_Encode( pCodec, piInput[2*i] );
     pbOutput[k] |= ADPCM_Encode( pCodec, piInput[2*i+1] )<<4;
     ++k;
   }
}

void ADPCM_DecodeBlock( T_ADPCM_Codec *pCodec, BYTE *pbInput, int *piOutput, int input_length )
{
  int i, k=0;
  for( i=0;i<input_length;i++)
   {
     piOutput[k++] = ADPCM_Decode( pCodec, pbInput[i]     & 0x0F );
     piOutput[k++] = ADPCM_Decode( pCodec, (pbInput[i]>>4)& 0x0F );
   }
}



/* EOF < \cbproj\SoundUtl\SoundMaths.c > */
