// File:  C:\cbproj\Remote_CW_Keyer\CwDSP.c
// Date:  2023-11-22
// Author:  Wolfgang Buescher (DL4YHF)
// Purpose: 'CW Digital Signal Processor' - initially just a sidetone generator,
//          later also a DSP with sample rate conversions and compression
//                for low-bandwidth audio streaming via network,
//                audio spectrum analyser, and -on top of that-
//                audio CW decoder.
//


#include "switches.h" // project specific 'compilation switches' like SWI_USE_DSOUND

#include <string.h>   // no string functions used here, but possibly memset()
#ifndef _WINDOWS      // obviously compiling for a PC, so ..
# include "math.h"    // generate our lookup-tables at runtime in RAM, instead of using a 'const table in ROM'
#endif

#include "Timers.h"   // high-resolution 'current time'-function, T_TIM_Stopwatch(), etc.
#include "Elbug.h"    // header for the basic 'Elbug' functions (plain C)
#include "FFT_API.h"  // Fast Fourier Transformations ("complex","real","inverse")
#include "SoundTab.h" // cosine lookup table, filter coefficients, T_Float, etc.
#include "CwDSP.h"    // header for THIS module ('CW Digitial Signal Processor')


//----------------------------------------------------------------------------
// Global variables for hardcore debugging / post-mortem crash analysis,
//        when the debugger's call stack shows nothing but garbage,
//        e.g. after an exception at the infamous address 0xFEEEFEEE .
//        ( C++Builder's debugger could still inspect SIMPLE GLOBAL
//          VARIABLES after most kinds of exceptions, but of course nothing
//          stack-based because the CPU- or task-stack was usually trashed.)
//----------------------------------------------------------------------------
#if(SWI_HARDCORE_DEBUGGING) // (1) = hardcore-debugging, (0)=normal compilation
 int DSP_iLastSourceLine = 0;  // WATCH THIS after crashing with e.g. "0xFEEEFEEE" !
# define HERE_I_AM__DSP() DSP_iLastSourceLine=__LINE__
     // (see complete list of other XYZ_iLastSourceLine variables to watch
     //  in C:\cbproj\Remote_CW_Keyer\Keyer_Main.cpp, near GUI_iLastSourceLine)
#else
# define HERE_I_AM__DSP()
#endif // SWI_HARDCORE_DEBUGGING ?


//----------------------------------------------------------------------------
// "Internal function prototypes" (functions called before their implementation)
//----------------------------------------------------------------------------

static DWORD WINAPI DspThread( LPVOID lpParam );
static int CwDSP_FrequencyToPhaseIncrement( int iAudioFrequency_Hz );
#if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
 static void CwDSP_EnterCriticalSection_AudioIO( T_CwDSP *pCwDSP );
 static void CwDSP_LeaveCriticalSection_AudioIO( T_CwDSP *pCwDSP );
#endif
static void CwDSP_ProcessComplexAudioSpectrumForCwDecoder( T_CwDSP *pCwDSP,
                      float *fltFFTRe, float *fltFFTIm, int nFrequencyBins,
                      double dblTimestamp_s );
static void CwDSP_ProcessAudioSpectrumForDecoder( T_CwDSP *pCwDSP, int iDecoder, double dblTimestamp_s );


// implementation of functions:


//----------------------------------------------------------------------------
void CwDSP_InitInstanceWithDefaults( T_CwDSP *pCwDSP )
  // Called ****ONCE**** from the main application (or "GUI") before loading an actual
  // configuration FROM A FILE. The result (in *pConfig) are the "defaults".
{
  int i;

  if( pCwDSP->dwInitMagic != DSP_INIT_MAGIC )
   { memset( pCwDSP, 0, sizeof(T_CwDSP) );
#   if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
     InitializeCriticalSection( &pCwDSP->csAudioIO );
#   endif
     pCwDSP->dwInitMagic = DSP_INIT_MAGIC;
   }
  else // someone didn't read the above ("call ****ONCE****"), so bear with him...
   { // don't memset() because this would kill our creepy little critter :o)
   }
  pCwDSP->cfg.iSidetoneRiseTime_ms = 4; // Icom offered 2, 4, 6, or 8 milliseconds
  pCwDSP->iSidetonePhaseAccu   = 0;
  pCwDSP->cfg.iSidetoneFreq_Hz = 650;
  pCwDSP->cfg.iAudioFlags = DSP_AUDIO_FLAGS_ALLOW_NETWORK_AUDIO;
  pCwDSP->cfg.iCwDecoderDotTime_ms = 48; // 48 ms per dot = 25 Words Per Minute
                   // Leave DSP_AUDIO_FLAGS_DECODE_CW *off* per default,
      //  because it's mostly annoying to let a MACHINE decode Morse Code.
      //  But sometimes it helps if the other OP stubbornly ignores "pse QRS".

  SRConv_InitDecimator( &pCwDSP->Decimators[0],    3/*decimation_ratio*/ );
  SRConv_InitDecimator( &pCwDSP->Decimators[1],    2/*decimation_ratio*/ );
  SRConv_InitDecimator( &pCwDSP->Interpolators[0], 2/*upsampling_ratio*/ );
  SRConv_InitDecimator( &pCwDSP->Interpolators[1], 3/*upsampling_ratio*/ );
      // '---> That's not a typo .. the same T_SRCONV_DECIMATOR struct
      //       can be used for decimation (downsampling)
      //       as well as for interpolation (upsampling) !
      //  First DECIMATE BY TWO, then decimate by three;
      //  first INTERPOLATE BY THREE, then interpolate by two. Guess why..

  FFT_ClearFloatArray(pCwDSP->fltFFTRe, CWDSP_AUDIO_SPECTRUM_FFT_LENGTH );
  FFT_ClearFloatArray(pCwDSP->fltFFTIm, CWDSP_AUDIO_SPECTRUM_FFT_LENGTH );
  FFT_BuildWindowTable(pCwDSP->fltFFTWindow, CWDSP_AUDIO_SPECTRUM_FFT_LENGTH, FFT_WINDOW_HANN );
  FFT_ClearFloatArray(pCwDSP->fltFFTInputQueue,CWDSP_AUDIO_SPECTRUM_FFT_LENGTH );
  pCwDSP->dwFFTCounter = 0;             // no audio FFT has been calculated yet
  pCwDSP->iFFTInputQueueHeadIndex = 0;  // index for the next sample, entered in fltFFTInputQueue[iFFTInputQueueHeadIndex++].
  FFT_ClearFloatArray(pCwDSP->fltAudioPowerSpectrum, CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS );
  pCwDSP->fltAudioSpectrumBinWidth_Hz = (float)CWDSP_INPUT_FIFO_SAMPLING_RATE / (float)CWDSP_AUDIO_SPECTRUM_FFT_LENGTH;
     //     '--> e.g. 8 kHz / 128 = 62.5 Hz
  pCwDSP->fltAudioSpectrumFrameRate = 2.0 * pCwDSP->fltAudioSpectrumBinWidth_Hz; // * 2 due to the 50 % overlap between FFTs
  pCwDSP->dblPlotterSamplingInterval_s = 1.0 / pCwDSP->fltAudioSpectrumFrameRate; // interval between two samples in sPlotterSampleFifo[], IN SECONDS PER SAMPLE,
     // for timestamp calculations in e.g. CwDSP_ReadFromPlotterFifo()
  CwDSP_InitSampleFifo( &pCwDSP->sInputFifo,     1.0 / CWDSP_INPUT_FIFO_SAMPLING_RATE /* [in] seconds/sample */ );
  CwDSP_InitSampleFifo( &pCwDSP->sOutputFifo,    1.0 / CWDSP_OUTPUT_FIFO_SAMPLING_RATE /* [in] seconds/sample */ );
  CwDSP_InitSampleFifo( &pCwDSP->sNetworkRxFifo, 1.0 / CWDSP_NETWORK_FIFO_SAMPLING_RATE /* [in] seconds/sample */ );

  for(i=0; i<CWDSP_NUM_AUDIO_CW_DECODERS; ++i)
   { StraightKeyDecoder_Init( &pCwDSP->AudioCwDecoder[i].MorseDecoder, pCwDSP->cfg.iCwDecoderDotTime_ms );
   }

} // end CwDSP_InitInstanceWithDefaults()


//---------------------------------------------------------------------------
float CwDSP_DecibelToVoltageGainFactor( int iGain_dB )
{ return pow( 10.0, (float)iGain_dB / 20.0 );
} // end CwDSP_DecibelToVoltageGainFactor()

//---------------------------------------------------------------------
void CwDSP_QuickSort_Float( float *pfltArray, int iLo, int iHi )
  // Recursive Quick Sort Algorithm for increasing order, in an array of 'float'.
  // Used in the Audio CW Decoder for a statistical analysis of the
  //  peak power history to find the noise level and the "key down" level
  //  - see CwDSP_ProcessAudioSpectrumForDecoder() .
{
  int    lo, hi;
  float  fltMid, fltSwap;

  lo = iLo;
  hi = iHi;
  fltMid = pfltArray[(lo+hi)/2];  // <- initial "pivot" point

  do
   {
    while (pfltArray[lo] < fltMid)
     { lo++;
       if( lo>=iHi )
        { break;   // 'emergency break'
        }
     }
    while (pfltArray[hi] > fltMid)
     { hi--;
       if( hi<=iLo )
        { break;   // 'emergency break'
        }
     }
    if (lo <= hi)
    {
      fltSwap = pfltArray[lo];
      pfltArray[lo] = pfltArray[hi];
      pfltArray[hi] = fltSwap;
      lo++;
      hi--;
    }
   }
  while (lo <= hi);

  if (hi > iLo)
   { CwDSP_QuickSort_Float( pfltArray, iLo, hi);
   }
  if (lo < iHi)
   { CwDSP_QuickSort_Float( pfltArray, lo, iHi);
   }
}



//---------------------------------------------------------------------------
void CwDSP_Start( T_CwDSP *pCwDSP )                                    // API
  // Tries to open all configured audio input and -output devices,
  //       and calls whatever the OS'es 'audio API' requires
  //       to read audio input, and produce audio output with our 'DSP'.
{
  CwDSP_Stop(pCwDSP); // <- doesn't do harm if not previously STARTED

#if( SWI_USE_DSOUND ) // use "DirectSound" / dsound_wrapper.c ?
  // If not done yet, but configured, open DirectSound devices for in- and/or output.
  //  [in]  pCwDSP->sz255AudioOutputDevice
  //  [out] pCwDSP->pDSW (our 'DirectSound' wrapper instance)
  if( (pCwDSP->pDSW != NULL ) && (pCwDSP->dwInitMagic==DSP_INIT_MAGIC) )
   {
#   if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
     CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads from calling DSW_xyz() now
#   endif
     DSW_CloseInput(  pCwDSP->pDSW ); // <- doesn't do harm if NOT opened at all, but don't close while DspThread() runs.
     DSW_CloseOutput( pCwDSP->pDSW ); // <- doesn't do harm if NOT opened at all, but don't close while DspThread() runs.
#   if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
     CwDSP_LeaveCriticalSection_AudioIO( pCwDSP );
#   endif
     if(   (pCwDSP->cfg.sz255AudioOutputDevice[0] != '\0')
        && (stricmp( pCwDSP->cfg.sz255AudioOutputDevice, "None") != 0 ) )
      { // want audio output via DirectSound -> kick it alive ..
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads from calling DSW_xyz()
#      endif
        if( ! DSW_OpenOutputDevice( pCwDSP->pDSW, pCwDSP->cfg.sz255AudioOutputDevice ) )
         { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
         }
        else // successfully opened the output device, ready for the next step:
        if( ! DSW_InitOutputBuffer( pCwDSP->pDSW, CWDSP_SAMPLING_RATE/*nFrameRate*/,
             1/*nChannels*/, CWDSP_OUTPUT_BUFFER_NSAMPLES*sizeof(short)/*bufSize*/ ) )
         { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
         }
        else // successfully initialized the output buffer (circular FIFO),
         { // ready for the next step (see DirectSound API summary in dsound_wrapper.c):
           if( ! DSW_StartOutput( pCwDSP->pDSW ) )
            { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
            }
           // If all works as planned, the rest happens in CwDSP_UpdateSidetone(),
           //                          periodically called from KeyerThread() .
         } // end if < successfully initialized a DirectSound OUTPUT BUFFER >
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // allow others to call DSW_xyz() again
#      endif
      }   // end if < want audio OUTPUT via 'Direct Sound' ? >
     if(   (pCwDSP->cfg.sz255AudioInputDevice[0] != '\0')
        && (stricmp( pCwDSP->cfg.sz255AudioInputDevice, "None") != 0 ) )
      { // want audio INPUT via DirectSound ->
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads from calling DSW_xyz()
#      endif
        if( ! DSW_OpenInputDevice( pCwDSP->pDSW, pCwDSP->cfg.sz255AudioInputDevice ) )
         { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
         }
        else // successfully opened the input device, ready for the next step:
        if( ! DSW_InitInputBuffer( pCwDSP->pDSW, CWDSP_SAMPLING_RATE/*nFrameRate*/, 1/*nChannels*/, CWDSP_INPUT_BUFFER_NSAMPLES*sizeof(short)/*bufSize*/ ) )
         { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
         }
        else // successfully initialized the input buffer (circular FIFO),
         { // ready for the next step (see DirectSound API summary in dsound_wrapper.c):
           if( ! DSW_StartInput( pCwDSP->pDSW ) )
            { strcpy( pCwDSP->sz255LastError, pCwDSP->pDSW->sz255LastOutputError );
            }
           // If all works as planned, the rest happens in CwDSP_ProcessAudioInput(),
           //                       periodically called from ... not sure from where.
         } // end if < successfully initialized a DirectSound OUTPUT BUFFER >
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // allow others to call DSW_xyz() again
#      endif        
      }   // end if < want audio INPUT via 'Direct Sound' ? >
   }     // end if( pCwDSP->pDSW != NULL )
#endif // SWI_USE_DSOUND ?

  //
  // Create the worker thread (again, using ancient WIN32 API functions only,
  //                           no "dot net" framework, no VCL, no Qt, etc)
  if( pCwDSP->hThread==NULL )
   {  pCwDSP->iThreadStatus = DSP_THREAD_STATUS_LAUNCHED;
      pCwDSP->hThread = CreateThread(
         NULL,    // LPSECURITY_ATTRIBUTES lpThreadAttributes = pointer to thread security attributes
         65536,   // DWORD dwStackSize  = initial thread stack size, in bytes
         DspThread, // LPTHREAD_START_ROUTINE lpStartAddress = pointer to thread function
         (LPVOID*)pCwDSP,    // LPVOID lpParameter = argument for new thread
         0,       // DWORD dwCreationFlags = creation flags
                  // zero -> the thread runs immediately after creation
         &pCwDSP->dwThreadId // LPDWORD lpThreadId = pointer to returned thread id
       );
      // > The thread object remains in the system until the thread has terminated
      // > and all handles to it have been closed through a call to CloseHandle.
   }
  if( pCwDSP->hThread==NULL ) // Check the return value for success.
   {
     strcpy( pCwDSP->sz255LastError, "CreateThread failed." );
     pCwDSP->iThreadStatus = DSP_THREAD_STATUS_NOT_CREATED;
     // Don't bail out without a worker thread. At least the SIDETONE GENERATOR
     // doesn't need the 'DSP Thread', because it's called every 2 milliseconds(!)
     // from the KEYER THREAD ... don't ask why.
   }
  else
   { // > Define the Thread's priority as required.
     SetThreadPriority( pCwDSP->hThread, // handle to the thread
                THREAD_PRIORITY_NORMAL); // thread priority level
   }

} // end CwDSP_Start()


//---------------------------------------------------------------------------
void CwDSP_Stop( T_CwDSP *pCwDSP )                                     // API
  // Stops worker threads (if any), closes audio devices (if any),
  // frees buffers, etc.
{

  int i;
  if(  (pCwDSP->iThreadStatus == DSP_THREAD_STATUS_LAUNCHED )
    || (pCwDSP->iThreadStatus == DSP_THREAD_STATUS_RUNNING  ) )
   { pCwDSP->iThreadStatus = DSP_THREAD_STATUS_TERMINATE; // politely ask the thread to terminate itself
     for(i=0; i<20; ++i )
      { Sleep(10);
        if( pCwDSP->iThreadStatus == DSP_THREAD_STATUS_TERMINATED ) // bingo..
         { break;  // .. the thread has terminated itself, so no need to kill it
         }
        // If the thread is unable to TERMINATE ITSELF (and we get stuck here),
        // watch DSP_iLastSourceLine (in the debugger). Sometimes, the thread
        // got stuck in the call to DSW_ReadBlock(), so also
        // watch DSW_iLastSourceLine (DSW=DirectSound Wrapper; dsound_wrapper.c).
        // 2024-01-06 : Indeed DSPThread() died in DSW_ReadBlock(), and
        //              DSW_ReadBlock() died in IDirectSoundCaptureBuffer_Unlock()
        //              for reasons yet to be discovered. WATCH the following via
        //              debugger (when compiled with SWI_HARDCORE_DEBUGGING) :
        //
      }
   }
  if( pCwDSP->hThread != NULL )
   { CloseHandle( pCwDSP->hThread );
     pCwDSP->hThread  =  NULL;
   }
  pCwDSP->iThreadStatus = DSP_THREAD_STATUS_NOT_CREATED;


#if( SWI_USE_DSOUND ) // use "DirectSound" / dsound_wrapper.c ?
  // If not done yet, but configured, open DirectSound devices for in- and/or output.
  //  [in]  pCwDSP->cfg.sz255AudioOutputDevice
  //  [out] pCwDSP->pDSW (our 'DirectSound' wrapper instance)
  if( (pCwDSP->pDSW != NULL) && (pCwDSP->dwInitMagic==DSP_INIT_MAGIC) )
   {
#   if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
     CwDSP_EnterCriticalSection_AudioIO( pCwDSP );
#   endif
     DSW_CloseInput(  pCwDSP->pDSW ); // <- doesn't do harm if NOT opened at all ..
     DSW_CloseOutput( pCwDSP->pDSW );
#   if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
     CwDSP_LeaveCriticalSection_AudioIO( pCwDSP );
#   endif
   }
#endif // SWI_USE_DSOUND ?

} // end CwDSP_Stop()

#if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
//---------------------------------------------------------------------------
static void CwDSP_EnterCriticalSection_AudioIO( T_CwDSP *pCwDSP ) // INTERN !
{
  // Avoid crashing without InitializeCriticalSection() [shouldn't happen, but..]
  if( pCwDSP->dwInitMagic==DSP_INIT_MAGIC )
   { EnterCriticalSection( &pCwDSP->csAudioIO );
     // Between now and the following CwDSP_LeaveCriticalSection_AudioIO(),
     // the calling thread may the DSW_xyz()-functions in dsound_wrapper.c .
     // THE TIME THE CALLER KEEPS THE CRITICAL SECTION OCCUPIED MUST BE
     // KEPT AS SHORT AS POSSIBLE - no waiting, no sleeping, no processing
     // of audio samples, etc between ..Enter.. and ..Leave.. !
   }
} // end CwDSP_EnterCriticalSection_AudioIO()

//---------------------------------------------------------------------------
static void CwDSP_LeaveCriticalSection_AudioIO( T_CwDSP *pCwDSP ) // INTERN !
{
  if( pCwDSP->dwInitMagic==DSP_INIT_MAGIC )
   { LeaveCriticalSection( &pCwDSP->csAudioIO );
   }
} // end CwDSP_EnterCriticalSection_AudioIO()
#endif // SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO ?


//---------------------------------------------------------------------------
void CwDSP_SwitchOutputState( T_CwDSP *pCwDSP,
    int iNewOutputState) // [in] DSP_OUTPUT_STATE_.. (off, sidetone, RX-audio)
                         // [out] pCwDSP->iOutputState
{
  // can we 'fulfil' the caller's wish ?
  if( pCwDSP->pDSW == NULL )  // no DirectSound device available ->
   { pCwDSP->iOutputState = DSP_OUTPUT_STATE_OFF; // the output can only be *OFF*
     return;
   }
  if( ! pCwDSP->pDSW->fOutputOpened )  // no audio-OUTPUT-device *open* ->
   { pCwDSP->iOutputState = DSP_OUTPUT_STATE_OFF; // the output can only be *OFF*
     return;
   }
  // Arrived here ? The DSP can principally 'generate an audio output' (locally),
  // so switch between whatever the caller has requested :
  if( iNewOutputState != pCwDSP->iOutputState ) // ... here, with a STATE TRANSITION
   { pCwDSP->iOutputState = iNewOutputState;
     // Note: DO NOT SLEEP HERE ! The caller may be the KEYER THREAD,
     //       which cannot even affort being blocked for more than a few millseconds !
     switch( iNewOutputState )
      { case DSP_OUTPUT_STATE_OFF : // start outputting 'silence'
           break;
        case DSP_OUTPUT_STATE_GENERATE_SIDETONE: // start generating a LOW-LATENCY SIDETONE (locally, with the min. possible latency)
           break;
        case DSP_OUTPUT_STATE_RECEIVER_AUDIO:    // start outputting the RECEIVER'S audio signal (with more buffering and latency)
           break;
      } // end switch( iNewOutputState )
   } // end if( iNewOutputState != pCwDSP->iOutputState )

} // end CwDSP_SwitchOutputState()

//---------------------------------------------------------------------------
void CwDSP_UpdateSidetone( T_CwDSP *pCwDSP, BOOL fKeyDown )
  // Does what the name implies. Called from the fast-running
  // "PLC-like" KEYER THREAD (see KeyerThread.c : KeyerThread() ),
  // possibly as fast as every 2 milliseconds. Must not block,
  // must not wait for other threads, must not call any 'slow' OS functions,
  // and must not interfere with the audio samples that are POSSIBLY being
  // sent to the output in DspThread() [with a higher output latency] !
  //
  //  [in]  fKeyDown : "Morse keying signal" for the sidetone.
  //                   TRUE = tone on, FALSE = tone off.
  //        How to generate the waveform, and how to add it
  //        to e.g. the audio signal from the remote receiver,
  //        depends on the low-level audio API (DirectSound ?)
  //        and on the configuration in T_CwDSP itself.
{
  long nBytesEmpty;
  short i16Samples[CWDSP_SIDETONE_BUFFER_NSAMPLES]; // Generate up to <CWDSP_SIDETONE_BUFFER_NSAMPLES> audio samples per call.
        // Note: The DirectSound output buffer must be large enough
        //    for AT LEAST 10 milliseconds of audio (don't ask why);
        //    but CWDSP_SIDETONE_BUFFER_NSAMPLES only needs to provide
        //    2 milliseconds of audio, plus some headroom because the
        //    calling interval of CwDSP_UpdateSidetone() isn't too predictable.
  int nSamples, nSamplesOccupied, iSample;
  int iSidetonePhaseInc;
  float fltSample, fltFactor, fltSidetoneRampInc;
  BOOL  fOk;

  if( (pCwDSP->pDSW != NULL) && (pCwDSP->pDSW->fOutputOpened) ) // Can we produce 'audio output' at all ?
   { // Automatically 'change over' between SIDETONE OUTPUT
     //  and passing through from RECEIVER (or network) to the AUDIO OUTPUT:
     if( pCwDSP->iOutputState != DSP_OUTPUT_STATE_GENERATE_SIDETONE )
      { // Audio output currently NOT used by the sidetone generator -> Switch now ?
        if( fKeyDown ) // yes, switch to "sidetone output" on "key down"
         { CwDSP_SwitchOutputState( pCwDSP, DSP_OUTPUT_STATE_GENERATE_SIDETONE );
         }
      } // end if( pCwDSP->iOutputState != DSP_OUTPUT_STATE_GENERATE_SIDETONE )

     if( pCwDSP->iOutputState == DSP_OUTPUT_STATE_GENERATE_SIDETONE ) // audio output currently occupied by the SIDETONE ?
      { // How many BYTES (not *sample points*) can we stuff into DirectSound's OUTPUT buffer ?
        // (to keep the latency as low as possible, don't the fill the output buffer COMPLETELY.
        //  Only fill in as many samples as we expect until THE NEXT CALL,
        //  which -when made from KeyerThread()- happens every two milliseconds)
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads
        // from interfering between DSW_QueryOutputSpace() and DSW_WriteBlock().
#      endif // SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO ?
        // Generating the samples only takes a few microseconds, so it's ok...
        if( DSW_QueryOutputSpace( pCwDSP->pDSW, &nBytesEmpty ) )
         { // Ideally, when entering CwDSP_UpdateSidetone(),
           // approximately <CWDSP_SIDETONE_BUFFER_NSAMPLES> should still
           // remain in the DirectSOund buffer, to avoid output buffer underruns.
           // But Direct Sound seems to 'drain' its output buffer only once in 10 ms.
           //  Example:  CWDSP_OUTPUT_BUFFER_NSAMPLES   = 2048  ~~ 42 milliseconds
           //            CWDSP_SIDETONE_BUFFER_NSAMPLES = 1024  ~~ 21 milliseconds
           //            CWDSP_SAMPLING_RATE            = 48000
           // When hitting the Morse key for the first time, the output buffer
           // may be completely empty, and a breakpoint below should fire
           // with nBytesEmpty = CWDSP_OUTPUT_BUFFER_NSAMPLES ...
           //   but it did NOT; the largest value that DSW_QueryOutputSpace()
           //   ever returned was HALF of the buffer size. Strange.
           //
           // To keep the audio latency down, even though the buffer is
           // LARGE, only emit APPROXIMATELY as many samples as the
           // calling interval (from KeyerThread() to CwDSP_UpdateSidetone(),
           // synchronized by the "Multimedia"-timer, set to 2 milliseconds).
           nSamples/*empty*/ = nBytesEmpty / sizeof(short); // MAXIMUM number of
                       // samples to emit (but usually MUCH MORE than necessary)
           nSamplesOccupied  = CWDSP_OUTPUT_BUFFER_NSAMPLES/2 - nSamples;
           //  ,---------------|_____________________________|
           //  '--> Kludge because DSW_QueryOutputSpace() NEVER reported
           //       an empty or almost empty buffer, even when
           //       DSW_WriteBlock() wasn't called at all. Got to clean up
           //       this mess one fine day; but for now, it works reliably.)
           #define L_WANTED_BUFFER_USAGE_MS 12 // If over <L_WANTED_BUFFER_USAGE_MS> of audio are already buffered ...
           if( nSamplesOccupied > (CWDSP_SAMPLING_RATE/(1000/L_WANTED_BUFFER_USAGE_MS)) )
            { if( nSamples > (CWDSP_SAMPLING_RATE/520) ) // .. then emit "slightly less than 2 ms"
               {  nSamples = (CWDSP_SAMPLING_RATE/520);  // emit e.g. 48000/520 = 92 samples = ~~1.92 milliseconds
               }
            }
           else // less than 12 ms samples already in the DirectSound output buffer ->
            { if( nSamples > (CWDSP_SAMPLING_RATE/480) ) // emit "slightly MORE than 2 ms" ...
               {  nSamples = (CWDSP_SAMPLING_RATE/480);  // e.g. 48000/480 = 100 samples = ~~2.08 milliseconds
               }
              // The result is a kind of control loop to keep the DirectSound
              // output buffer JUST BELOW the point of a buffer *underrun*.
              // This gives the lowest latency that the "new" 10-millisecond
              // READ INTERVAL permits. If audio stutters or seems to "hum",
              // allow a few MORE milliseconds in the buffer.
              // When all worked as planned, the "report" on the 'Debug' tab showed:
              //  >  SidetoneSamples: 92 92 100 100 92 92 92 100 100 100 92 92 100 100 92 100
              // (which means ON AVERAGE, 'short' and 'slightly longer' sample blocks
              //  are equally distributed)
            }
           if( nSamples >= (CWDSP_SAMPLING_RATE/1000) ) // only calculate new waveform when at least 1ms can be emitted
            {
              // Store the number of audio samples sent per 2-ms 'KeyerThread'
              //  in a short FIFO for debugging (this can be examined on the 'Debug'-tab):
              pCwDSP->i16SidetoneSamplesPerUpdate[ pCwDSP->dwSidetoneUpdates & 15 ] = nSamples;
              ++pCwDSP->dwSidetoneUpdates;
              fltFactor = CwDSP_DecibelToVoltageGainFactor( pCwDSP->cfg.iSidetoneGain_dB );
              fltFactor *= 3276.7; /* 32767 to scale into 16-bit integer "minus 20 dB" */
              iSidetonePhaseInc = CwDSP_FrequencyToPhaseIncrement( pCwDSP->cfg.iSidetoneFreq_Hz );
              if( pCwDSP->cfg.iSidetoneRiseTime_ms > 0 )  // use a "nicely shaped" sidetone ?
               { fltSidetoneRampInc = (1000.0/CWDSP_SAMPLING_RATE) / (pCwDSP->cfg.iSidetoneRiseTime_ms);
                 // Example: CWDSP_SAMPLING_RATE = 12000 [Hz],
                 //    iSidetoneRiseTime_ms = 4 [ms] -> 12 kHz * 4 ms = 48 samples
                 //    during which pCwDSP->iSidetoneRamp must rise from 0 to 1.0
                 // -> fltSidetoneRampInc = (1000 / 48000) / 4 = circa 0.005 .
               }
              else // no ramp but "hard keying" (clickety-click, who cares about bandwidth..)
               { fltSidetoneRampInc = 32767;
               }
              for( iSample=0; iSample<nSamples; ++iSample )
               {
                 // "Ramp up" or "Ramp down" ?
                 if( fKeyDown ) // "ramp up" (until reaching the sidetone's full amplitude)
                  { pCwDSP->fltSidetoneRamp += fltSidetoneRampInc;
                    if( pCwDSP->fltSidetoneRamp > 1.0 )
                     {  pCwDSP->fltSidetoneRamp = 1.0;
                     }
                  }
                 else // "ramp down" (until arriving at The Sound of Silence)
                  { pCwDSP->fltSidetoneRamp -= fltSidetoneRampInc;
                    if( pCwDSP->fltSidetoneRamp < 0.0 )
                     {  pCwDSP->fltSidetoneRamp = 0.0;
                     }
                  }
                 fltSample = fltFactor * pCwDSP->fltSidetoneRamp * SoundTab_fltCosTable[ pCwDSP->iSidetonePhaseAccu ];
                 if( fltSample > 32767.0 )
                  {  fltSample = 32767.0;
                  }
                 if( fltSample < -32767.0 )
                  {  fltSample = -32767.0;
                  }
                 i16Samples[iSample] = (short)fltSample;
                 pCwDSP->iSidetonePhaseAccu += iSidetonePhaseInc;
                 pCwDSP->iSidetonePhaseAccu &= (SOUND_COS_TABLE_LEN-1);
               }
              if( ! DSW_WriteBlock( pCwDSP->pDSW, (char *)i16Samples, nSamples*sizeof(short) ) ) // here: Called from  KeyerThread() -> CwDSP_UpdateSidetone()
               { // Don't STRING-COPY the error message, just copy a POINTER here:
                 pCwDSP->pszLastError = pCwDSP->pDSW->sz255LastOutputError;
               } // end if < DSW_WriteBlock() failed >
            }   // end if < enough space to 'generate' a few milliseconds of CW sidetone >
         }     // end if < Mr. DirectSound-wrapper successully queried the OUTPUT BUFFER SPACE >
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // allow others to call DirectSound again
#      endif        
      } // end if < iOutputState == DSP_OUTPUT_STATE_GENERATE_SIDETONE > ?
   }   // end if < got a valid pointer to a DirectSound wrapper instance, *AND* "open for output" > ?
}     // end CwDSP_UpdateSidetone()


//---------------------------------------------------------------------------
static int CwDSP_FrequencyToPhaseIncrement( int iAudioFrequency_Hz )
  // Computes the integer 'phase increment' for our cosine lookup table,
  //        SoundTab_fltCosTable[SOUND_COS_TABLE_LEN] (in SoundTab.cpp) .
  // Again: This module was originally written with a microcontroller firmware
  //        in mind where we can't afford to call floating point functions
  //        like sin(), cos(), etc - especially not from a fast-running
  //        timer interrupt that directly calculates audio samples for a DAC.
{
  // At typical "audio frequencies" (like 650 Hz for the CW sidetone),
  //    we get along with the following "rounding integer calculation".
  return (iAudioFrequency_Hz * SOUND_COS_TABLE_LEN + (CWDSP_SAMPLING_RATE/2) )
                                                    / CWDSP_SAMPLING_RATE;
  // Examples :
  //  iAudioFrequency_Hz = 0 (off) -> iSidetonePhaseInc = 0
  //  iAudioFrequency_Hz = 650 Hz  -> iSidetonePhaseInc = (650 * 32768 + 24000) / 48000 = 444
  //  iAudioFrequency_Hz = 640 Hz  -> iSidetonePhaseInc = (640 * 32768 + 24000) / 48000 = 437
  // ,----------------------------------------------------|___________________|
  // '--> Easily fits inside a 32-bit int, but wouldn't fit in a 16-bit integer.
  //      12- or 16-bit 'PIC' microcontroller firmware developers beware.
} // end CwDSP_FrequencyToPhaseIncrement()


//---------------------------------------------------------------------------
void CwDSP_ShortToFloat( short *pi16In, float *pfltOut, int nSamples, float fltFactor )
  // [in] fltFactor: usually 1.0 / 32767.0 to "normalize" the 16-bit input
  //                 to -1.0 .. +1.0 in the floating point output input .
{
  while( (nSamples--) > 0 )
   { *pfltOut++ = (float)(*pi16In++) * fltFactor;
   }
} // end CwDsp_ShortToFloat()

//---------------------------------------------------------------------------
void CwDSP_FloatToShort( float *pfltIn, short *pi16Out, int nSamples, float fltFactor )
  // [in] fltFactor: usually 32767.0 to "de-normalize" the floating-point
  //                 (ranging from -1.0 to +1.0) into 16-bit signed integer ouput.
{
  float flt;
  while( (nSamples--) > 0 )
   { flt = (*pfltIn++) * fltFactor;
     if( flt > 32767.0f )  // limit (clip) BEFORE converting to integer
      {  flt = 32767.0f;
      }
     if( flt < -32767.0f ) // limit (clip) BEFORE converting to integer
      {  flt = -32767.0f;  // Note that we deliberately omit -32768 here.
         // That pattern (0x8000 as 'short int') may be used for framing, etc.
      }
     *pi16Out++ = (short)flt;
   }
} // end CwDsp_FloatToShort()

//---------------------------------------------------------------------------
void CwDSP_InitSampleFifo( T_CwDSP_Fifo *pFifo, double dblSecondsPerSample )
{ pFifo->head.index = pFifo->iTail  = 0;
  pFifo->head.dblTimestamp_s = DSW_ReadHighResTimestamp_s();
  pFifo->dblSecondsPerSample = dblSecondsPerSample;
} // end CwDSP_InitSampleFifo()

//---------------------------------------------------------------------------
int CwDSP_GetNumSamplesInFifoForTailIndex( T_CwDSP_Fifo *pFifo, int iTail )
  // Takes A MOMENTARY SNAPSHOT of the number of samples in any of the DSP's FIFOs,
  // suitable for MULTIPLE READERS (where each of those readers has its own
  // 'tail index', while there is always only ONE WRITER / one HEAD index) .
{
  int nSamples = pFifo->head.index - iTail;
  if( nSamples < 0 )  // circular index wrap between "head" and "tail" ->
   {  nSamples += CWDSP_PROCESS_FIFO_NSAMPLES;
   }
  return nSamples;
} // end CwDSP_GetNumSamplesInFifoForTailIndex()

//---------------------------------------------------------------------------
int CwDSP_GetNumSamplesInFifo( T_CwDSP_Fifo *pFifo ) // .. using the primary head- and tail index
  // Takes A MOMENTARY SNAPSHOT of the number of samples in any of the DSP's FIFOs,
{ return CwDSP_GetNumSamplesInFifoForTailIndex( pFifo, pFifo->iTail );
} // end CwDSP_GetNumSamplesInFifo()

//---------------------------------------------------------------------------
int CwDSP_GetFreeSpaceInFifo( T_CwDSP_Fifo *pFifo )
  // Takes A MOMENTARY SNAPSHOT of the number of samples that may be WRITTEN
  // into this FIFO before it overflows.
{ return (CWDSP_PROCESS_FIFO_NSAMPLES-1) - CwDSP_GetNumSamplesInFifo( pFifo );
  // ,---------------------------------'
  // '--> This "classic, lock-free, circular FIFO" can store ONE SAMPLE LESS
  //      than the fixed queue size (in sample points), because :
  //      head index == tail index                 ->    FIFO is empty
  //      (head index + 1), wrapped == tail index  -> FIFO is COMPLETELY full
} // end CwDSP_GetFreeSpaceInFifo()

//----------------------------------------------------------------------------
void CwDSP_ReadFifoHeadIndexAndTimestamp( T_CwDSP_Fifo *pFifo, T_CWDSP_FifoHead *pHead)
  // Tries to capture the FIFO's head index and timestamp for the head index
  // "simultaneously", without costly interrupt locks or thread synchronisation.
  // Assume the READER (and thus the caller of CFIFO_ReadHeadIndexAndTimestamp() )
  // has a lower thread- or interrupt priority than the WRITER ..
  //     see CircularFifo.c, with a similar kludge .
{
  int iHead2;
  int nRetries = 10; // if we don't manage to take a consistent "snapshot"
                     // of struct T_CFIFO_Head, give up after ten loops.
  do // may have to repeated this when "interrupted" - see CwDSP_WriteToFifo()
   {
     // Copy the entire T_CFIFO_Head struct .. "uninterrupted" by the writer if we're lucky:
#   if(1)
     *pHead = *(volatile T_CWDSP_FifoHead *)&pFifo->head;
        //     |___________________________|-- hint for an overly smart C compiler
        //                                     to READ THIS AGAIN in each loop .
#   else
     *pHead = pFifo->head;
#   endif
     ++nRetries; // give up after 10 retries (the timestamp may be "slightly off" then, but still usable)

     // Check if both copies of the FIFO head-index are equal in our "snapshot".
     // If they are, and if the overly smart C compiler didn't modify the sequence
     // of instructions when WRITING to the FIFO (in e.g. the ADC interrupt),
     // chances are good that the TIMESTAMP (written between 'head' and 'head2')
     // applies to the sample at the captured head-index .
     // Quite naive and not bullet-proof but better than nothing ..
   }
  while( (pHead->index != pHead->index2) && (nRetries<10) );

} // end CwDSP_ReadFifoHeadIndexAndTimestamp()

//---------------------------------------------------------------------------
double CwDSP_GetTimestampForFifoEntry( T_CwDSP_Fifo *pFifo, int iTailIndex )
{ T_CWDSP_FifoHead sHead;
  int    iOffset;
  CwDSP_ReadFifoHeadIndexAndTimestamp( pFifo, &sHead );
  iOffset = (sHead.index - iTailIndex);
  if( iOffset < 0 ) // circular buffer wrap..
   { iOffset += CWDSP_PROCESS_FIFO_NSAMPLES;
   }
  return sHead.dblTimestamp_s - pFifo->dblSecondsPerSample * iOffset;
} // end CwDSP_GetTimestampForFifoEntry()

//---------------------------------------------------------------------------
void CwDSP_ReadFromFifo( T_CwDSP_Fifo *pFifo, int *piTail, float *pfltDestBuffer, int nSamples,
           double *pdblTimestamp_s ) // [out,optional] timestamp of the 1st sample in pfltDestBuffer,
                                     //       comparable with DSW_ReadHighResTimestamp_s() .
  // Note: The caller has already checked the number of samples available in the source FIFO
  //       by calling CwDSP_GetNumSamplesInFifoForTailIndex() .
{ int i, iTail, iOffset;
  T_CWDSP_FifoHead sHead;
  CwDSP_ReadFifoHeadIndexAndTimestamp( pFifo, &sHead );
  if( piTail == NULL )
   {  piTail = &pFifo->iTail;
   }
  iTail = *piTail & (CWDSP_PROCESS_FIFO_NSAMPLES-1);
  if( pdblTimestamp_s != NULL )  // caller wants the timestamp of the first sample, so CALCULATE it:
   { // [in] sHead.dblTimestamp_s : HIGHER VALUE (in seconds) than the older sample at the old tail index !
     iOffset = (sHead.index - iTail);
     if( iOffset < 0 ) // circular buffer wrap..
      { iOffset += CWDSP_PROCESS_FIFO_NSAMPLES;
      }
     *pdblTimestamp_s = sHead.dblTimestamp_s - pFifo->dblSecondsPerSample * iOffset;
   }
  for( i=0; i<nSamples; ++i )
   { pfltDestBuffer[i] = pFifo->fltQueue[ iTail++ ];
     if( iTail >= CWDSP_PROCESS_FIFO_NSAMPLES/*2048?*/ )
      {  iTail = 0; // <-- good place for a breakpoint ..
      }
   }

  *piTail = iTail; // put the FIFO TAIL index back where it belongs
} // end CwDSP_ReadFromFifo()


//---------------------------------------------------------------------------
void CwDSP_WriteToFifo( T_CwDSP_Fifo *pFifo, float *pfltSourceBuffer, int nSamples,
                        double dblTimestamp_s ) // [in] accurate timestamp for the first sample in the new block
  // Note: The caller has already checked the remaining capacity in the destination FIFO,
  //       or simply ignores if the READER (that drains the FIFO) can keep up the pace.
{ int i, iHead = pFifo->head.index & (CWDSP_PROCESS_FIFO_NSAMPLES-1);
  for( i=0; i<nSamples; ++i )
   { pFifo->fltQueue[ iHead++ ] = pfltSourceBuffer[i];
     if( iHead >= CWDSP_PROCESS_FIFO_NSAMPLES/*2048?*/ )
      {  iHead = 0; // <-- good place for a breakpoint ..
      }
   }
  pFifo->head.index2 = iHead; // kludge to reduce the possibility of getting THE TIMESTAMP wrong, in CwDSP_ReadFromFifo().
  pFifo->head.dblTimestamp_s = dblTimestamp_s + (double)nSamples * pFifo->dblSecondsPerSample;
  pFifo->head.index = iHead;  // put the FIFO HEAD index back where it belongs .
  // This make the new samples in pCwDSP->fltInputFifo[] available for other threads,
  // for example when THE KEYER THREAD reads another bunch
  // in CwKeyer_CollectDataForTimingScope(), every TWO MILLISECONDS .
  //
  // Concerned about the non-atomic setting of pFifo->iHead and pFifo->dblTimestampAtHeadIndex_s above ?
  // If CwDSP_ReadFromFifo(), called from another thread, really "interrupts"
  //    CwDSP_WriteToFifo() between setting dblTimestampAtHeadIndex_s and iHead,
  //    he may notice that by comparing iHead2 and iHead, "captured" there in
  //    local variables.

} // end CwDSP_WriteToFifo()

//---------------------------------------------------------------------------
void CwDSP_ProcessSampleFromReceivedAudioStream( T_CwDSP *pCwDSP,
       float fltSample, // [in] single audio sample received with f_sample = 8 kHz
                        // from the NETWORK THREAD (CwNet.c), already normalized to -1 .. +1 .
      double dblTimestamp_s) // [in] precise timestamp, comparable with DSW_ReadHighResTimestamp_s()
{
  if( pCwDSP != NULL )
   {
     if( (pCwDSP->iOutputState != DSP_OUTPUT_STATE_GENERATE_SIDETONE )
       &&(pCwDSP->iOutputState != DSP_OUTPUT_STATE_NETWORK_AUDIO ) )
      { // ok; switch to OUTPUT NETWORK AUDIO :
        CwDSP_SwitchOutputState( pCwDSP, DSP_OUTPUT_STATE_NETWORK_AUDIO );
        // ,-----------------------------'
        // '--> Controls the 'routing' of audio in DspThread() .
      }
     CwDSP_WriteToFifo( &pCwDSP->sNetworkRxFifo, &fltSample, 1/*nSamples*/, dblTimestamp_s );
     // Depending on pCwDSP->iOutputState, the DSP worker thread
     // will either pass this received audio stream on to the RADIO (voice-TX?)
     // or to the CLIENT OPERATOR's loudspeaker, headphone, or whatever .
     // Of course that doesn't happen sample-by-sample but block-by-block.
   }
} // end CwDSP_ProcessSampleFromReceivedAudioStream()




//---------------------------------------------------------------------------
DWORD WINAPI DspThread( LPVOID lpParam )
  // 'DSP thread' : Reads input from the audio device (if configured),
  //                sends output to another audio device (if configured),
  //                proceses audio from / for streaming via network,
  //                and who-knows-what ...
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  // Audio signal flow and objects involved in DspThead():
  //     Input from "Direct Sound" / local RX or client's microphone (with f_sample = 48 kHz)
  //     |
  //    \|/
  //    Decimators[]                          sNetworkRxFifo (f_sample = 8 kHz)
  //     |                                              |
  //     |---------<---("without local AUDIO-INPUT")----'
  //    \|/
  //   sInputFifo   (with f_sample = 8 kHz)
  //     |
  //    \|/
  //   Optional audio filter
  //     |
  //    \|/
  //   sOutputFifo  (with f_Sample = 8 kHz)
  //     |
  //    \|/
  //   Interpolators[]
  //     |
  //    \|/
  //   Output to "Direct Sound" (with f_sample = 48 kHz)
  //
  // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
{
  T_CwDSP *pCwDSP = (T_CwDSP*)lpParam;
  // T_CwNet *pCwNet = CwKeyer_Config.pCwNet; // optional instance for audio "via CW network"

  T_TIM_Stopwatch sw_BeginOfThreadLoop, sw_SpeedTest, sw_SpeedTest4Spectrum;
  long nMicrosecondsSinceLastCall;
  int  t_us, t_ms, iResult, iFifoHead, iFifoTail;
  int  iThreadExitCode = 1;
  long nBytesFilled, nBytesEmpty, nBytesToReadOrWrite;
  int  i, nSamplesIn, nSamplesOut, nDecimatedSamples/*at 8 kS/s*/;
  int  nLoopsWithoutOutput = 0;
# define SAMPLES_PER_AUDIO_IO_BUFFER 2048 // <- must be large enough for ~~20 ms
      // (worst-case-estimate for the thread loop time)
      //   20 ms * 48 kHz = 960 samples ~~ 1024 (or better 2048 for more headroom)
  short i16AudioInOutBuffer[SAMPLES_PER_AUDIO_IO_BUFFER];
  float fltAudioInputBuffer[SAMPLES_PER_AUDIO_IO_BUFFER];
  float fltAudioOutputBuffer[SAMPLES_PER_AUDIO_IO_BUFFER];
  float fltFactor, fltSample;
  BOOL  ok;
  T_CwDSP_Fifo *pSourceFifo;
  double d, diff, dblTimestamp_s, dblOldestTimestamp_s;


  pCwDSP->iThreadStatus = DSP_THREAD_STATUS_RUNNING;
  pCwDSP->dwThreadLoops = pCwDSP->dwThreadErrors = 0;


  TIM_StartStopwatch( &sw_BeginOfThreadLoop ); // don't leave this un-initialized..

  // Almost endless DSP thread loop begins HERE.............................
  while( (pCwDSP->iThreadStatus == DSP_THREAD_STATUS_RUNNING)
      && (pCwDSP->dwInitMagic == DSP_INIT_MAGIC) ) // bail out when the magic smoke escaped
   {
     HERE_I_AM__DSP();  // -> DSP_iLastSourceLine (when compiled for 'hardcore debugging')
     TIM_StartStopwatch( &pCwDSP->sw_ThreadWatchdog ); // <- allow the GUI to check for "crashed threads" :
         // (If iThreadStatus == DSP_THREAD_STATUS_RUNNING but
         //  but TIM_ReadStopwatch( &pCwDSP->sw_ThreadWatchdog ) > 500,
         //  the GUI may report problems in the error history)
     ++pCwDSP->dwThreadLoops;
     pCwDSP->dw8ThreadIntervals_us[pCwDSP->dwThreadLoops&7]
       = TIM_ReadAndRestartStopwatch_us( &sw_BeginOfThreadLoop );

     Sleep(5); // <- Not very reliable because windows may let this thread "sleep"
     // for MUCH LONGER than 5 milliseconds (often seen 6 ms, but expect more) .
     HERE_I_AM__DSP(); // didn't arrive HERE ? Most likely, crash in ANOTHER thread.

     // -------------- AUDIO INPUT (from selected audio device) -------------
     // Destination:  pCwDSP->sInputFifo
#   if( SWI_USE_DSOUND ) // use Windows "DirectSound" / dsound_wrapper.c ?
     if( (pCwDSP->pDSW != NULL ) && (DSW_IsInputOpened( pCwDSP->pDSW ) ) ) // Only if the AUDIO INPUT DEVICE is open, collect samples from it:
      { HERE_I_AM__DSP();
        TIM_StartStopwatch( &sw_SpeedTest );
#      if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
        CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads...
        // from interfering between DSW_QueryInputFilled() and DSW_ReadBlock().
#      endif
        HERE_I_AM__DSP();
        if( DSW_QueryInputFilled( pCwDSP->pDSW, &nBytesFilled ) ) // new AUDIO INPUT available ?
         { // Note: From Mr. DirectSound, bytes arrived in funny block sizes
           //       of e.g. nBytesFilled = 3000 bytes, sometimes even 4024 bytes,
           //       so don't assume they are nice powers of two !
           HERE_I_AM__DSP();
           nSamplesIn = nBytesFilled / sizeof(short); // -> 16 ... SAMPLES_PER_AUDIO_IO_BUFFER
           if( nSamplesIn > SAMPLES_PER_AUDIO_IO_BUFFER ) // repect the capacity of i16AudioInOutBuffer[SAMPLES_PER_AUDIO_IO_BUFFER]
            {  nSamplesIn = SAMPLES_PER_AUDIO_IO_BUFFER;
            }
           nBytesFilled = nSamplesIn * sizeof(short); // back from NUMBER OF SAMPLE POINTS to a 'number of bytes' ... sigh
           if( nSamplesIn < 16 ) // don't waste time reading less than 16 samples from the input
            {
#            if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
              CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // immediately allow others to call Mr. DirectSound again
#            endif
            }
           else // got ENOUGH samples for processing (don't waste time processing one or two samples..) ->
            { HERE_I_AM__DSP();  // <- this confirmed crashing in DSW_ReadBlock() with the '0xFEEEFEEE' exception !
              ok = DSW_ReadBlock( pCwDSP->pDSW, (char*)i16AudioInOutBuffer, nBytesFilled/*! .. not nSamplesIn !*/,
                                  &dblOldestTimestamp_s );
              HERE_I_AM__DSP(); // <- survived another call of DSW_ReadBlock() ...
#            if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
              CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // after DSP_ReadBlock(), allow others to call Mr. DirectSound again
              HERE_I_AM__DSP(); // <- survived leaving the critter supposed to protect Mr. DirectSound ...
#            endif
              if( ok )
               { // DirectSound (input) delivered i16AudioInOutBuffer[0..nSamplesIn-1] .
                 // Typically seen here: nSamplesIn = 476 .   476 / 48kHz = 9.9 ms,
                 //  obviously caused by the "10 millisecond DirectSound I/O update rate".
                 CwDSP_UpdateSpeedTestResult( pCwDSP, DSP_SPEEDTEST_READ_AUDIO_INPUT, TIM_ReadStopwatch_us( &sw_SpeedTest) );
                 // ,---------------------------------|____________________________|
                 // '--> here: time spent in DSW_QueryInputFilled() plus DSW_ReadBlock().
                 TIM_StartStopwatch( &sw_SpeedTest ); // now measure the time spent for converting/downsampling the INPUT
                 // Convert the 16-bit signed integers into DSP-friendly floating point samples.
                 // So we don't have to care about integer overflow (or clipping)
                 // until converting the processed result back into integer,
                 // with e.g. automatic gain control, or whatever :
                 fltFactor = CwDSP_DecibelToVoltageGainFactor( pCwDSP->cfg.iAudioInGain_dB ) / 32767.0;
                 CwDSP_ShortToFloat( i16AudioInOutBuffer, fltAudioInputBuffer, nSamplesIn, fltFactor );
                 //  '--> not only converts the data type, but also SCALES
                 //       (normalizes) the sample value ranges to -1.0 ... +1.0 .
                 // Next processing step: DOWNSAMPLE to 8 kSamples/second ..
                 nDecimatedSamples = SRConv_TwoStageDecimator( pCwDSP->Decimators, fltAudioInputBuffer,
                                        nSamplesIn, fltAudioOutputBuffer );
                 // The DECIMATOR may cause a latency of a few samples,
                 //     which 'theoretically' should be added to dblOldestTimestamp_s here.
                 //     But the uncertainty of how many audio samples are still waiting
                 //     'in the pipe' between analog input and DSW_QueryInputFilled()
                 //     is much higher than that (in the order of tens of milliseconds).
                 // We're at fsample = 8 kHz here now .
                 //       From nSamples = 476, nDecimatedSamples = 80 poured out
                 //       of the decimator (decimating by SIX in this case).
                 //                       476 / 6 = 79.3333 -> ok .
                 // Store the decimated result in the DSP's CIRCULAR INPUT BUFFER:
                 CwDSP_WriteToFifo( &pCwDSP->sInputFifo, fltAudioOutputBuffer, nDecimatedSamples, dblOldestTimestamp_s );
                 CwDSP_UpdateSpeedTestResult( pCwDSP, DSP_SPEEDTEST_DOWNSAMLE_INPUT, TIM_ReadStopwatch_us( &sw_SpeedTest) );
               }
            } // end while < enough samples to fill another audio buffer >
         } // end if < DSW_QueryInputFilled() successful ? >
        // No-No: CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); (already called above, e.g. shortly after DSW_ReadBlock() )
        HERE_I_AM__DSP();
      } // end if( pCwDSP->pDSW != NULL )
     else  // here: "without local AUDIO-INPUT" as sketched in the audio signal flow (further above)
      { // This may be the 'normal case' for an RCW Keyer Instance running as CLIENT for a remote radio,
        // when exclusively used for CW (and thus no need for a microphone on the client PC) !
        // Instead of the AUDIO INPUT DEVICE ("soundcard" or a modern Icom radio's RX-audio signal),
        // the CW-DSP may be used to process the NETWORK AUDIO SIGNAL instead. (future plan)
        // The RCW Keyer's "Scope Display", analog channel titled "audio input",
        // would then show the signal that ENTERS the DSP audio filter,
        // while the "audio output" shows the FILTERED signal.
      }
#   endif // SWI_USE_DSOUND ?

     // Optionally feed the decimated audio signal in sInputFifo,
     //          with f_sample = CWDSP_INPUT_FIFO_SAMPLING_RATE,
     //  into the "Audio CW Decoder" (demodulator part) ?
     if( pCwDSP->cfg.iAudioFlags & DSP_AUDIO_FLAGS_DECODE_CW )
      { // Start or restart the "Audio CW Demodulator / Decoder" ?
        if( pCwDSP->AudioCwDecoder[0].iCwDecoderState == CWDSP_DECODER_STATE_OFF )
         { CwDSP_StartAudioCwDecoder( pCwDSP );
         }
        // What's the "source" for the Audio CW Decoder ?
        pSourceFifo = &pCwDSP->sInputFifo;  // default "source" for the CW audio decoder
        if( pCwDSP->iOutputState == DSP_OUTPUT_STATE_NETWORK_AUDIO ) // audio output currently "from the network" (e.g. remote server) ?
         { pSourceFifo = &pCwDSP->sNetworkRxFifo; // <- fortunately, CWDSP_NETWORK_FIFO_SAMPLING_RATE == CWDSP_INPUT_FIFO_SAMPLING_RATE
         }
        iFifoTail = pCwDSP->iCwDecoderSourceFifoTail; // use a local copy for speed ..
        nSamplesIn = CwDSP_GetNumSamplesInFifoForTailIndex( pSourceFifo, iFifoTail );
        // '--> Number of samples at fs=8 kHz we MAY read from pSourceFifo for THIS particular "reader" .
        dblTimestamp_s = CwDSP_GetTimestampForFifoEntry( pSourceFifo, iFifoTail );
        // '--> precise timestamp for the first sample processed in the loop below
        for( i=0; i<nSamplesIn; ++i )
         { fltSample = pSourceFifo->fltQueue[iFifoTail];
           iFifoTail = (iFifoTail+1) & (CWDSP_PROCESS_FIFO_NSAMPLES-1); // circular index wrap ..
           //
           // Feed the sample also into the OVERLAPPING BUFFER for the short-time FFTs.
           // Principle: fltSample -------,
           //                             |
           //                 iFFTInputQueueHeadIndex++
           //                             |
           //                     |..... \|/ ...>
           //                    ,----------------,  Note: For each FFT,
           // fltFFTInputQueue[] |<--FFT_LENGTH-->|   only HALF of this buffer
           //                    '----------------'   is "thrown away" !
           //                             '---->,--------------------------,
           //                                   | FFT_MultiplyWindow_Real()|
           //                             ,---->|__________________________|
           //                    ,----------------,     ||
           //   fltFFTWindow[]   |    __-----__   |     ||
           //                    |_---         --_|    _||_
           //                    '----------------'    \  /
           //                                           \/
           //  ,------------,   /| 'Real' FFT     ,------------,
           //  | fltFFTRe[] |  / |______________  | fltFFTRe[] |
           //  '------------' /                 | '------------'
           //  ,------------, \   ______________|
           //  | fltFFTIm[] |  \ |                ( no imaginary part on INPUT )
           //  '------------'   \|
           //
           //     Note: As with many 'in-place FFT' implementations,
           //           the output (complex frequency bins)
           //           overwrite the input (windowed samples in the time domain).
           //     No big loss, because due to the 50 % overlap in the time domain,
           //     fltFFTInputQueue[] must be SHIFTED and MULTIPLIED with the
           //     window.
           //     After each FFT (again, with 50 % overlap), only HALF
           //     of the samples in fltFFTInputQueue[] are 'scrolled out',
           //     and iFFTInputQueueHeadIndex reduced by CWDSP_PROCESS_FIFO_NSAMPLES/2 .
           //     Then, when iFFTInputQueueHeadIndex reaches CWDSP_PROCESS_FIFO_NSAMPLES(!),
           //     again, the process sketched above repeats.
           pCwDSP->fltFFTInputQueue[ pCwDSP->iFFTInputQueueHeadIndex++ ] = fltSample;
           if( pCwDSP->iFFTInputQueueHeadIndex >= CWDSP_AUDIO_SPECTRUM_FFT_LENGTH )
            { // Time for action, as sketched above !
              TIM_StartStopwatch( &sw_SpeedTest4Spectrum );
              FFT_MultiplyWindow_Real(
                 pCwDSP->fltFFTInputQueue, // [in] float *pfltInputSamples,
                 pCwDSP->fltFFTWindow,     // [in] float *pfltWindow,
                 pCwDSP->fltFFTRe,         // [out] float *pfltDestSamples,
                 CWDSP_AUDIO_SPECTRUM_FFT_LENGTH); // [in] int iLength
              FFT_CalcRealFft( // transform REAL samples in the time domain into COMPLEX frequency bins
                 CWDSP_AUDIO_SPECTRUM_FFT_LENGTH, // number of points in the time domain (details in FFT_API.c..)
                 pCwDSP->fltFFTRe,  // [in] real input signal, [out] REAL PART of the complex frequency bins
                 pCwDSP->fltFFTIm); // [out] imaginary part of the complex frequency bins
              FFT_CopyFloatArray( // scroll out HALF of the time-domain samples .. again, see sketch above
                 pCwDSP->fltFFTInputQueue+CWDSP_AUDIO_SPECTRUM_FFT_LENGTH/2, // [in] pfltSource: SECOND half of the input queue
                 pCwDSP->fltFFTInputQueue, // [out] pfltDest, FIRST half of the input queue
                 CWDSP_AUDIO_SPECTRUM_FFT_LENGTH/2); // [in] int iLength (number of ARRAY ELEMENTS, not "sizeof(something) in BYTES")
              pCwDSP->iFFTInputQueueHeadIndex -= CWDSP_AUDIO_SPECTRUM_FFT_LENGTH/2;
              ++pCwDSP->dwFFTCounter;  // here: incremented in DspThread(), after calculating a new, overlapped AUDIO FFT
              // The NEXT, "half-overlapped" FFT will be calculated
              // when iFFTInputQueueHeadIndex reaches CWDSP_AUDIO_SPECTRUM_FFT_LENGTH again.
              CwDSP_ProcessComplexAudioSpectrumForCwDecoder( pCwDSP,
                 pCwDSP->fltFFTRe, pCwDSP->fltFFTIm, CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS,
                 dblTimestamp_s );

              CwDSP_UpdateSpeedTestResult( pCwDSP, DSP_SPEEDTEST_DECODER_SPECTRUM, TIM_ReadStopwatch_us( &sw_SpeedTest4Spectrum ) );
              // ,------------------------------|____________________________|
              // '--> here: time spent in DspThread() to calculate a new AUDIO SPECTRUM (overlapped short-time FFT)

            } // end if( pCwDSP->iFFTInputQueueHeadIndex >= CWDSP_PROCESS_FIFO_NSAMPLES )
           dblTimestamp_s += pSourceFifo->dblSecondsPerSample; // timestamp for the sample processed in the next loop
         }   // end of < all samples fed into the "Audio CW Decoder" (demodulator) >
        pCwDSP->iCwDecoderSourceFifoTail = iFifoTail;
      }    // end if( pCwDSP->cfg.iAudioFlags & DSP_AUDIO_FLAGS_DECODE_CW )
     else // Audio CW Decoder shall be OFF ->
      { pCwDSP->AudioCwDecoder[0].iCwDecoderState = CWDSP_DECODER_STATE_OFF;
      }

     // At THIS point, the decimated pCwDSP->fltInputFifo[] is ready for processing.
     // In many cases ("CW network" enabled), the next step is
     //    AUDIO COMPRESSION (into 8 bits per sample) and merging of that stream
     //    into the outbound TCP/IP traffic (in module CwNet.c) .
     // Only if THERE IS NO CLIENT/SERVER COMMUNICATION INVOLVED,
     //    the "processing" merely consists of copying
     //    as many samples from pCwDSP->fltInputFifo[] into pCwDSP->fltOutputFifo[],
     //    e.g. to pass on the audio from the local receiver
     //         to the operator's "shack PC" or whatever.
     pSourceFifo = &pCwDSP->sInputFifo;  // default "source" for the audio output
     if( pCwDSP->iOutputState == DSP_OUTPUT_STATE_NETWORK_AUDIO ) // audio output currently "from the network" (e.g. remote server) ?
      { pSourceFifo = &pCwDSP->sNetworkRxFifo;
      }

     nSamplesOut = CwDSP_GetFreeSpaceInFifo( &pCwDSP->sOutputFifo );
        // '--> number of samples that CAN be appended to the circular output FIFO.
     nSamplesIn = CwDSP_GetNumSamplesInFifo( pSourceFifo );
        // '--> number of samples that CAN read from the circular input FIFO.
     if( nSamplesOut < nSamplesIn )  // for COPYING samples, take the lower of the two..
      { nDecimatedSamples = i;    // here: number of samples limited by INPUT-FIFO-usage
        (void)nDecimatedSamples;  // "assigned a value that is never used" .. oh, shut up, Mr Pedantic !
      }
     if( nSamplesIn > 0 )  // copy as many samples as possible from source- to output FIFO:
      { iFifoTail = pSourceFifo->iTail;
        iFifoHead = pCwDSP->sOutputFifo.head.index;
        for( i=0; i<nSamplesIn; ++i )
         {
#         if( 1 ) // (1)=normal compilation, (0)=test with a pure sinewave for the DECIMATED OUTPUT
           pCwDSP->sOutputFifo.fltQueue[ iFifoHead++ ] = pSourceFifo->fltQueue[ iFifoTail++ ];
#         else
           pCwDSP->sOutputFifo.fltQueue[ iFifoHead ] = SoundTab_fltCosTable[
                                 (iFifoHead * (32*SOUND_COS_TABLE_LEN/CWDSP_PROCESS_FIFO_NSAMPLES) ) % SOUND_COS_TABLE_LEN];
                // sinewave periods per FIFO___|  |____ 32768 ? ____| |_________ 2048 ___________|
           iFifoHead++;
           iFifoTail++;  // TEST: throw away the input
#         endif // normal compilation or "output interpolator test" with a clean sinewave ?
           iFifoHead &= (CWDSP_PROCESS_FIFO_NSAMPLES-1); // circular index wrap ..
           iFifoTail &= (CWDSP_PROCESS_FIFO_NSAMPLES-1);
         }
        pSourceFifo->iTail = iFifoTail;  // drained the "source" (sInputFifo or sNetworkRxFifo)
        pCwDSP->sOutputFifo.head.index2 = iFifoHead; // kludge to reduce the possibility of getting THE TIMESTAMP wrong, in CwDSP_ReadFromFifo().
        pCwDSP->sOutputFifo.head.dblTimestamp_s = CwDSP_GetTimestampForFifoEntry( pSourceFifo, pSourceFifo->iTail ); // <- may be "off by one sample".. anyway
        pCwDSP->sOutputFifo.head.index  = iFifoHead;  // we've FILLED the output FIFO..
      } // end if < copy any (decimated) samples from pCwDSP->sInputFifo to pCwDSP->sOutputFifo ?

     // -------------- AUDIO OUTUT (via selected audio device) --------------
     if( pCwDSP->iOutputState != DSP_OUTPUT_STATE_GENERATE_SIDETONE ) // audio output currently NOT occupied by the SIDETONE ?
      {
#      if( SWI_USE_DSOUND ) // use Windows "DirectSound" / dsound_wrapper.c ?
        if( pCwDSP->pDSW != NULL ) // ok, there is a DirectSound-wrapper ...
         { HERE_I_AM__DSP();

           // Determine the number of samples we COULD read from the OUTPUT FIFO
           // (contains DECIMATED samples, i.e. at f_sample = 8 kHz) :
           nDecimatedSamples = CwDSP_GetNumSamplesInFifo( &pCwDSP->sOutputFifo );
           nSamplesIn = nDecimatedSamples * 6; // we're upsampling by SIX..
           // nSamplesIn is the number of samples we COULD 'interpolate' (upsample)
           // to f_sample = 48 kHz at the moment, if Mr. DirectSound-OUT permits.
           // But avoid to interpolate and write to the output device if only
           // a few samples can be written at all:
           if( nSamplesIn >= 16 ) // "enough samples available to write to the output" ?
            { // How many samples can Mr. DirectSound's OUTPUT consume at the moment ?
#            if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
              CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads
              // from interfering between DSW_QueryOutputSpace() and DSW_WriteBlock().
              // Interpolating a few samples for output only takes a few microseconds,
              // so this is ok, too.
#            endif
              // Determine the number of interpolated (upsampled) samples
              // we COULD write to the audio output device at the moment:
              if( DSW_QueryOutputSpace( pCwDSP->pDSW, &nBytesEmpty ) ) // ready for AUDIO OUTPUT ?
               { // As usual from DirectSound, exect to see funny "empty sizes"
                 // like nBytesEmpty = 3776, when the thread WANTS TO WRITE .
                 nSamplesOut = nBytesEmpty / sizeof(short);
                 if( nSamplesOut >= 64 ) // also "worth the effort" now ?
                  { HERE_I_AM__DSP();
                    if( nSamplesOut > nSamplesIn )
                     {  nSamplesOut = nSamplesIn;
                     }
                    if( nSamplesOut > SAMPLES_PER_AUDIO_IO_BUFFER )
                     { // Limit this to the capacity of fltAudioInputBuffer[] :
                       nSamplesOut = SAMPLES_PER_AUDIO_IO_BUFFER; // e.g. 1024 (enough for 20 ms, if the thread loop is "slow"..)
                     }
                    nDecimatedSamples = nSamplesOut / 6;  // <- this is an EXACT number..
                    // '--> ALWAYS fits inside fltAudioInputBuffer[SAMPLES_PER_AUDIO_IO_BUFFER],
                    //      e.g. nDecimatedSamples = 1024 / 6 = 170; enough for 20 ms @ fsample=8 kHz.
                    // Copy as many DECIMATED sample as we can from the DSP's circular FIFO.
                    // SRConv_TwoStageUpsampler() needs them in a simple, non-circular ARRAY.
                    CwDSP_ReadFromFifo( &pCwDSP->sOutputFifo, &pCwDSP->sOutputFifo.iTail, fltAudioInputBuffer, nDecimatedSamples, &dblTimestamp_s );
                    nSamplesOut = SRConv_TwoStageUpsampler( pCwDSP->Interpolators,
                                             fltAudioInputBuffer/*pfltInput*/,
                                              nDecimatedSamples/*nSamplesIn*/,
                                           fltAudioOutputBuffer/*pfltOutput*/);
                    if(  (nSamplesOut > 0 )  // the "two-stage upsampler" seems to be working..
                      && (nSamplesOut <= SAMPLES_PER_AUDIO_IO_BUFFER) ) // and we won't exceed our audio I/O buffer...
                     { // .. so convert the 32-bit floating point values from our DSP
                       //    into 16-bit signed integers for Mr. DirectSound :
                       fltFactor = 32767.0 * CwDSP_DecibelToVoltageGainFactor( pCwDSP->cfg.iAudioOutGain_dB );
                       CwDSP_FloatToShort( fltAudioOutputBuffer, i16AudioInOutBuffer, nSamplesOut, fltFactor );
                       // For example, with nDecimatedSamples = 170 ( ~~ 20ms @ fsample=8 kHz),
                       //     there are nSamplesOut = 170 * 6 = 1020(!) samples to write now:
                       if( ! DSW_WriteBlock( pCwDSP->pDSW, (char *)i16AudioInOutBuffer, nSamplesOut*sizeof(short) ) ) // here: called from DspThread()
                        { // Don't STRING-COPY the error message, just copy a POINTER here:
                          pCwDSP->pszLastError = pCwDSP->pDSW->sz255LastOutputError;
                        } // end if < DSW_WriteBlock() failed >
                       else // successfully wrote samples to the AUDIO OUTPUT DEVICE ->
                        { nLoopsWithoutOutput = 0;
                        }
                     } // end if < SRConv_TwoStageUpsampler() working > ?
                  } // end if( nBytesEmpty >= (int)sizeof(i16AudioInOutBuffer) )
               }   // end if < DSW_QueryOutputSpace() successful > ?
#            if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
              CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // allow others to call DirectSound again
#            endif
            }    // end if < "enough samples available to write to the output" > ?
           else  // less than 16(?) samples available to write to the output ->
            { // This often happened when e.g. a network connection was unstable.
              // The DirectSound output repeated the same samples in a short loop,
              // causing an unpleasant buzzing sound. So try to "silence" the output..
              if( (nLoopsWithoutOutput++) > 10 ) // .. after 10 thread loops
               {
#               if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
                 CwDSP_EnterCriticalSection_AudioIO( pCwDSP ); // prevent other threads from interfering..
#               endif
                 DSW_ZeroEmptySpaceInOutputBuffer( pCwDSP->pDSW );
#               if( SWI_USE_CRITICAL_SECTIONS_FOR_AUDIO_IO )
                 CwDSP_LeaveCriticalSection_AudioIO( pCwDSP ); // allow others to call DirectSound again
#               endif
               }
            }
         }      // end if( pCwDSP->pDSW != NULL )
#      endif   // SWI_USE_DSOUND ?
      }       // end if( pCwDSP->iOutputState != DSP_OUTPUT_STATE_GENERATE_SIDETONE )

   } // end while < DSP thread loop >

  // If the program ever gets here, the thread has "politely" terminated itself,
  //                    or someone has pulled the emergency brake .
  pCwDSP->iThreadStatus = DSP_THREAD_STATUS_TERMINATED;

  ExitThread( iThreadExitCode ); // exit code for this thread
  return iThreadExitCode; // will this ever be reached after "ExitThread" ?
} // end DspThread()
//---------------------------------------------------------------------------

//---------------------------------------------------------------------------
void CwDSP_UpdateSpeedTestResult( T_CwDSP *pCwDSP, // .. for development only ..
             int iTestItem,     // [in] e.g. KEYER_SPEEDTEST_POLL_KEYBOARD
             int nMicroseconds ) // [in] new test result in microseconds
  // Note: Similar _UpdateSpeedTestResult() incarnations exist in
  //        AuxComPorts.c, CwText.c, and KeyerThread.c. The latter is even
  //        instantiatable for an ARRAY of 'worker threads' using serial ports.
{
  if( nMicroseconds >= pCwDSP->iSpeedTestPeaks_us[iTestItem] )
   {  pCwDSP->iSpeedTestPeaks_us[iTestItem] = nMicroseconds;
      // Note: A new "peak- and average detection" begins
      //       when clicking "Report Test Results (on the 'debug' tab)" in the GUI
      //  -> CwDSP_ResetSpeedTestResults()
   }
  pCwDSP->i32SpeedTestSums_us[iTestItem] += nMicroseconds;
  ++pCwDSP->i32SpeedTestCounts_us[iTestItem];

} // end CwDSP_UpdateSpeedTestResult()

//---------------------------------------------------------------------------
int  CwDSP_GetSpeedTestAverage_us( T_CwDSP *pCwDSP,
              int iTestItem ) // [in] e.g. KEYER_SPEEDTEST_POLL_KEYBOARD
{ long i32Divisor = pCwDSP->i32SpeedTestCounts_us[ iTestItem ];
  if( i32Divisor > 0 )
   { return (int)( pCwDSP->i32SpeedTestSums_us[iTestItem] / i32Divisor );
   }
  else // no valid data for this 'item' -> say the average time spent for it was ZERO
   { return 0;
   }
} // end CwDSP_GetSpeedTestAverage_us()

//---------------------------------------------------------------------------
void CwDSP_ResetSpeedTestResults(T_CwDSP *pCwDSP) // begins a new peak- and average detection
{
  memset( (void*)pCwDSP->iSpeedTestPeaks_us,   0, sizeof(pCwDSP->iSpeedTestPeaks_us) );
  memset( (void*)pCwDSP->i32SpeedTestSums_us,  0, sizeof(pCwDSP->i32SpeedTestSums_us) );
  memset( (void*)pCwDSP->i32SpeedTestCounts_us,0, sizeof(pCwDSP->i32SpeedTestCounts_us));
} // end CwDSP_ResetSpeedTestResults()




//---------------------------------------------------------------------------
// Experimental "CW decoder for audio signals".
//  (very basic, doesn't even get close to 'CW Skimmer', anyway... )
//---------------------------------------------------------------------------


//---------------------------------------------------------------------------
void CwDSP_StartAudioCwDecoder( T_CwDSP *pCwDSP )
      // [in]  pCwDSP->cfg.iSidetoneFreq_Hz      : INITIAL signal frequency
      // [in]  pCwDSP->cfg.iCwDecoderDotTime_ms  : INITIAL CW timing
{
  int  i;
  for(i=0; i<CWDSP_NUM_AUDIO_CW_DECODERS; ++i)
   { pCwDSP->AudioCwDecoder[i].iFirstFrequencyBin = i * (CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS / CWDSP_NUM_AUDIO_CW_DECODERS);
     pCwDSP->AudioCwDecoder[i].iLastFrequencyBin  = pCwDSP->AudioCwDecoder[i].iFirstFrequencyBin + (CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS / CWDSP_NUM_AUDIO_CW_DECODERS) - 1;
     //  '--> As long as there is only ONE SINGLE DECODER,
     //       its frequency bin range is 0 ..CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS-1 .
     pCwDSP->AudioCwDecoder[i].iCwDecoderState = CWDSP_DECODER_STATE_SQUELCHED;
     pCwDSP->AudioCwDecoder[i].fltCwDecoderCenterFrequency = pCwDSP->cfg.iSidetoneFreq_Hz;
     pCwDSP->AudioCwDecoder[i].fltCwDecoderNoisePower = pCwDSP->AudioCwDecoder[i].fltCwDecoderSignalPower = 0.0f;
     StraightKeyDecoder_Init( &pCwDSP->AudioCwDecoder[i].MorseDecoder, pCwDSP->cfg.iCwDecoderDotTime_ms );
   }

} // CwDSP_StartAudioCwDecoder()

//---------------------------------------------------------------------------
void CwDSP_SetDotTimeForAudioCWDecoder( T_CwDSP *pCwDSP, int iNewDotTime_ms ) // API..
{
  int  i;
  for(i=0; i<CWDSP_NUM_AUDIO_CW_DECODERS; ++i)
   { pCwDSP->AudioCwDecoder[i].MorseDecoder.iDotTime_ms = iNewDotTime_ms;
   }
} // end CwDSP_SetDotTimeForAudioCWDecoder()


//---------------------------------------------------------------------------
static void CwDSP_ProcessComplexAudioSpectrumForCwDecoder( // Called from DspThread() immediately after finishing a new FFT
        T_CwDSP *pCwDSP,    // [in,out] CW-DSP-instance :
                            // [out] pCwDSP->CwDec_ComplexAudioFrequencyBins[CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS],
                            // [out] pCwDSP->fltAudioPowerSpectrum[CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS]
        float   *fltFFTRe,  // [in] real part of the complex frequency bins
        float   *fltFFTIm,  // [in] imaginary part of the complex frequeny bins
        int nFrequencyBins, // up to (!) CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS
        double dblTimestamp_s) // [in] latency-compensated timestamp of the FFT window center
               // in seconds. Comparable with DSW_ReadHighResTimestamp_s(),
               // to align different channels when PLOTTING in the "timing scope".
               // A 'good' timestamp wasn't easy to achieve because the Windows
               // audio API (e.g. "Direct Sound") doesn't provide timestamps at all.
               // - see 'source' of the AUDIO SAMPLE TIMESTAMPS
               //   in C:\cbproj\Remote_CW_Keyer\dsound_wrapper.c .
  // Call Stack: DspThread() [after calculating short-time FFT, with 50 % window overlap]
  //              -> CwDSP_ProcessComplexAudioSpectrumForCwDecoder()
  //               -> CwDSP_ProcessAudioSpectrumForDecoder() [possibly for MULTIPLE decoders]
  //
{ int   i, iDecoder;
  float re,im, scaling_factor, normalized_power;

  scaling_factor = 0.5f / (float)CWDSP_AUDIO_SPECTRUM_FFT_LENGTH; // factor for magnitudes independent of FFT size
       //  '--> This factor resulting in normalized_power close to 1.0 for a "full-swing sinewave" (0 dBfs).
  if( nFrequencyBins > CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS )
   {  nFrequencyBins = CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS;
   }
  for(i=0; i<nFrequencyBins; ++i )
   { re = *(fltFFTRe++);
     im = *(fltFFTIm++);
     normalized_power = (re*re + im*im) * scaling_factor; // "normalized" to 1.0 for the peak of a full-swing sine wave
     // (consider this: re and im are proportional to a VOLTAGE, so the added squares are proportonal to a POWER or ENERGY (=power * time)
     pCwDSP->CwDec_ComplexAudioFrequencyBins[i].re = re;
     pCwDSP->CwDec_ComplexAudioFrequencyBins[i].im = im;
     pCwDSP->fltAudioPowerSpectrum[i] = normalized_power;
   } // end for < loop to copy the complex spectrum, and calculate the real 'audio spectrum' >
  for( iDecoder=0; iDecoder<CWDSP_NUM_AUDIO_CW_DECODERS; ++iDecoder )
   { CwDSP_ProcessAudioSpectrumForDecoder( pCwDSP, iDecoder, dblTimestamp_s );
   }

  ++pCwDSP->i32AudioSpectrumUpdateCounter;

} // end CwDSP_ProcessComplexAudioSpectrumForCwDecoder()

//---------------------------------------------------------------------------
void CwDSP_ProcessAudioSpectrumForDecoder(
        T_CwDSP *pCwDSP,   // [in] pCwDSP->fltAudioPowerSpectrum[],
                           // [in] pCwDSP->CwDec_ComplexAudioFrequencyBins[]
                           //      (there's only ONE audio spectrum for ALL decoders)
        int iDecoderIndex, // [in,out] pCwDSP->AudioDecoder[iDecoderIndex] .
        double dblTimestamp_s) // [in] latency-adjusted timestamp of the FFT window center in seconds, comparable with DSW_ReadHighResTimestamp_s(), to align channels when PLOTTING in the "timing scope"
{
  T_AudioCwDecoder *pDecoder = &pCwDSP->AudioCwDecoder[iDecoderIndex];
  int i, peak_bin, iDecodedPattern;
  long  nMicrosecondsSinceLastCall;
  float re,im, scaling_factor, normalized_power, peak_power, peak_freq;
  float fltSortedHistory[CWDSP_DECODER_POWER_HISTORY_LENGTH];
  BOOL  fKeyDown;

  if( pCwDSP->fltAudioSpectrumFrameRate > 0.0f )
   { nMicrosecondsSinceLastCall = (long)(1e6 / pCwDSP->fltAudioSpectrumFrameRate);
   }
  else
   { nMicrosecondsSinceLastCall = (1000000L * CWDSP_AUDIO_SPECTRUM_FFT_LENGTH ) / ( CWDSP_INPUT_FIFO_SAMPLING_RATE * 2L);
     // e.g. 1000000 * 512 samples / (8000 samples/second  * 2 for the 50% overlap) = 32000 us
   }

  peak_bin   = 0;  // no real "peak" found yet ...
  peak_power = 0.0f;
  for(i=pDecoder->iFirstFrequencyBin; i<=pDecoder->iLastFrequencyBin; ++i )
   { normalized_power = pCwDSP->fltAudioPowerSpectrum[i]; // <- "normalized" to 1.0 for the peak of a full-swing sine wave (not logarithmized)
     if( normalized_power > peak_power )
      { peak_power = normalized_power;
        peak_bin = i;  // will check if this is a "significant" peak further below !
      }
   } // end for < loop to copy the complex spectrum, and calculate the real 'audio spectrum' >
  pDecoder->fltCwDecoderCenterFrequency = (float)peak_bin * pCwDSP->fltAudioSpectrumBinWidth_Hz/* e.g. 62.5 Hz*/;


  // Next: Update pDecoder->fltCwDecoderNoisePower, fltCwDecoderSignalPower,
  //                   and  fltCwDecoderKeyingSignal .
  pDecoder->fltCwDecoderSignalPower = peak_power; // <- displayed as the "ball riding the waves" of the audio spectrum.
  // Finding the "noise level" is not as trivial as one may guess. Due to the receiver's
  // narrow-band filter, which may be as narrow as a single frequency bin,
  // DON'T TAKE THE LEVEL IN THE STOPBAND FREQUENCY BINS as a measure for the 'noise level' !
  //  (strictly: We don't know which frequency bins are in the filter's stopband at all).
  // Instead, after finding the strongest peak in the first loop (above),
  // check its neighbour bins which are most likely in the PASSBAND.
  // If there are no neighbour bins with a significant energy,
  // use this decoder's power-history
  pDecoder->fltPowerHistory[pDecoder->iPowerHistoryIndex] = peak_power;
  pDecoder->iPowerHistoryIndex = (pDecoder->iPowerHistoryIndex + 1) % CWDSP_DECODER_POWER_HISTORY_LENGTH;
  if( pDecoder->iPowerHistoryIndex < CWDSP_DECODER_POWER_HISTORY_LENGTH )
   { ++pDecoder->iPowerHistoryIndex;
   }
  for(i=0; i<CWDSP_DECODER_POWER_HISTORY_LENGTH; ++i)
   {
#   if(1) // 1=normal compilation, 0=test for the QuickSort algorithm
     fltSortedHistory[i] = pDecoder->fltPowerHistory[i];
#   else
     fltSortedHistory[i] = (float)(CWDSP_DECODER_POWER_HISTORY_LENGTH-i);
     // '-->  { 150,149,148,..,3,2,1 }  BEFORE sorting,
     //       { 1,2,3,..,148,149,150 }  AFTER sorting.  (test 2025-02-23 : Ok)
#   endif
   }
  CwDSP_QuickSort_Float( fltSortedHistory, 0/*iLo*/, CWDSP_DECODER_POWER_HISTORY_LENGTH-1/*iHi*/ );
  //  '--> When fed with 'fairly broadband noise' (IC-7300 in USB), the peak frequency wandered around near 1300 Hz, and..:
  //   fltSortedHistory[  0..  9] = { 1.2e-5, 1.2e-5, 1.2e-5, 1.2e-5, .. }  but
  //   fltSortedHistory[140..149] = { 0.16,   0.17,  .. 0.21, 0.21, 0.22 } !
  //
  // With a sufficient CWDSP_DECODER_POWER_HISTORY_LENGTH, the first few
  // samples in fltSortedHistory[] represent the "noise level" (without signal),
  // the last few samples represent the "carrier level" (with signal, key down).
  // ToDo:  To make these readings less noisy, use averaving, or e.g. the first and last quartile ?
  pDecoder->fltCwDecoderNoisePower  = fltSortedHistory[0];
  pDecoder->fltCwDecoderSignalPower = fltSortedHistory[CWDSP_DECODER_POWER_HISTORY_LENGTH-1];
  if( pDecoder->fltCwDecoderSignalPower > pDecoder->fltCwDecoderNoisePower ) // avoid div-by-zero without a signal
   {  pDecoder->fltCwDecoderKeyingSignal = (peak_power-pDecoder->fltCwDecoderNoisePower)
                  / (pDecoder->fltCwDecoderSignalPower-pDecoder->fltCwDecoderNoisePower);
   }
  else // instead of dividing by zero, assume this:
   {   pDecoder->fltCwDecoderKeyingSignal = 0.0f;  // no signal at all
   }

#if(0) // (0)=normal compilation, (1)=TEST for plotting the CW decoder's keying signal
  pDecoder->fltCwDecoderKeyingSignal = (float)(pCwDSP->i32AudioSpectrumUpdateCounter & 15) / 15.0f;
  // '--> This exposed a weakness of the sampling of data for the 'CW timing scope'
  //      in KeyerThread() -> CwKeyer_CollectDataForTimingScope() :
  //      There was indeed a 16-step sawtooth with 16 * 8 ms = 128 milliseconds,
  //      but due to the audio-sample-processing in 'larger blocks',
  //      the rising slope was full of kinks.
#endif
  CwDSP_WriteToFifo( &pDecoder->sKeyingSignalFifo, &pDecoder->fltCwDecoderKeyingSignal, 1/*nSamples*/, dblTimestamp_s );
  fKeyDown = (pDecoder->fltCwDecoderKeyingSignal > 0.2f);
  iDecodedPattern = StraightKeyDecoder( &pDecoder->MorseDecoder, fKeyDown, nMicrosecondsSinceLastCall );
  if( ( (iDecodedPattern>0) && (iDecodedPattern<=0xFF) ) || (iDecodedPattern & CW_CHR_SPACE) )
   { // Append the 'morse code pattern' to a thread-safe FIFO,
     // from where the application can retrieve it:
     CwDSP_WriteToCwPatternDecoderFifo( pDecoder, (WORD)iDecodedPattern ); // here: RECEIVED text decoded from the short-time audio spectra
   }
  if( iDecoderIndex==0 )
   { CwDSP_WriteToPlotterFifo( pCwDSP, dblTimestamp_s, (iDecodedPattern>0) ? (WORD)iDecodedPattern : 0 );
   }

} // end CwDSP_ProcessAudioSpectrumForDecoder()


//----------------------------------------------------------------------------
void CwDSP_WriteToCwPatternDecoderFifo( T_AudioCwDecoder *pDecoder, WORD wCwPattern )
     // '--> [out] pDecoder->wCwPatternFifo[ pDecoder->iCwPatternFifoHeadIndex++ ],
     //            pDecoder->iCwPatternFifoUsage (maxes out at CWDSP_DECODER_FIFO_SIZE)
{
  int iHeadIndex = pDecoder->iCwPatternFifoHeadIndex % CWDSP_DECODER_FIFO_SIZE; // <- safety first..
  pDecoder->wCwPatternFifo[ iHeadIndex++ ] = wCwPattern;
  if( iHeadIndex > pDecoder->iCwPatternFifoUsage )
   {  pDecoder->iCwPatternFifoUsage = iHeadIndex; // number of elements in the FIFO (maxes out at CWDSP_DECODER_FIFO_SIZE)
   }
  // Writing the INCREMENTED head index back to the struct makes the new entry
  // available for another thread (e.g. the GUI thread, occasionally polling the head index):
  pDecoder->iCwPatternFifoHeadIndex = iHeadIndex % CWDSP_DECODER_FIFO_SIZE;
  // Note: This "FIFO writer" doesn't care about the reader's TAIL INDEX,
  //       last not least to keep it simple, and because there may be
  //       MULTIPLE READERS (and each of them has its own tail index).
  //
} // end CwDSP_WriteToCwPatternDecoderFifo()


//----------------------------------------------------------------------------
WORD CwDSP_ReadFromCwPatternDecoderFifo( T_AudioCwDecoder *pDecoder, int *piTailIndex )
  // '--> [in] pDecoder->wCwPatternFifo[ *(piTailIndex++) ],
  //           if   pDecoder->iCwPatternFifoHeadIndex != *piTailIndex
  //           and  pDecoder->iCwPatternFifoUsage  indicates 'data available' .
  // Returns ZERO if nothing is currently available for THIS reader,
  // otherwise the Morse code pattern or Morse code 'prosign' (see format in Elbug.h) .
{
  int iTailIndex;
  WORD wCwPattern = 0;
  if( (pDecoder!=NULL) && (piTailIndex!=NULL) )
   { iTailIndex = *piTailIndex % CWDSP_DECODER_FIFO_SIZE;
     if( ( pDecoder->iCwPatternFifoHeadIndex != iTailIndex ) // "FIFO not empty" for THIS reader ->
      && ( pDecoder->iCwPatternFifoUsage > iTailIndex ) )
      { wCwPattern = pDecoder->wCwPatternFifo[ iTailIndex++ ];
        *piTailIndex = iTailIndex % CWDSP_DECODER_FIFO_SIZE;
      }
   }
  return wCwPattern;
} // end CwDSP_ReadFromCwPatternDecoderFifo()

//----------------------------------------------------------------------------
void CwDSP_WriteToPlotterFifo( T_CwDSP *pCwDSP,
        double dblTimestamp_s, // [in] "latency-adjusted" timestamp in seconds, comparable with DSW_ReadHighResTimestamp_s(), to align channels when PLOTTING in the "timing scope"
        WORD   wCwPattern )    // [in] zero when "nothing decoded now",
                               //      otherwise the Morse keying pattern
                               //      of a single character or prosign .
   // [in] (besides the above function arguments):
   //           pCwDSP->AudioCwDecoder[0].fltCwDecoderCenterFrequency,
   //           pCwDSP->AudioCwDecoder[0].fltCwDecoderKeyingSignal 
   //
   // [out] the CW-decoder's "plotter FIFO", drained by the GUI thread
   //       when updating the timining scope via CwDSP_ReadFromPlotterFifo()
{
  int i;
  int iHeadIndex = pCwDSP->iPlotterSampleFifoHeadIndex & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1); // <- safety first..
  T_CwDSP_PlotterSample *pPS = &pCwDSP->sPlotterSampleFifo[ iHeadIndex++ ];
  // Before incrementing the FIFO's HEAD INDEX (which makes the new entry available for the reading thread),
  // collect the members of T_CwDSP_PlotterSample directly in the circular FIFO:
  pPS->dblTimestamp_s = dblTimestamp_s;
  pPS->dwFFTCounter   = pCwDSP->dwFFTCounter;  // here: copied into a T_CwDSP_PlotterSample in CwDSP_WriteToPlotterFifo()
  pPS->wCwPattern     = wCwPattern;
  pPS->fltCwDecoderCenterFrequency = pCwDSP->AudioCwDecoder[0].fltCwDecoderCenterFrequency;
  pPS->fltCwDecoderKeyingSignal    = pCwDSP->AudioCwDecoder[0].fltCwDecoderKeyingSignal;
  for( i=0; i<CWDSP_AUDIO_SPECTRUM_NUM_FREQUENCY_BINS; ++i)
   { pPS->fltAudioPowerSpectrum[i] = pCwDSP->fltAudioPowerSpectrum[i];
   }
  if( iHeadIndex > pCwDSP->iPlotterSampleFifoUsage )
   {  pCwDSP->iPlotterSampleFifoUsage = iHeadIndex; // number of elements in the FIFO (maxes out at CWDSP_PLOTTER_SAMPLE_FIFO_SIZE)
   }
  pCwDSP->iPlotterSampleFifoHeadIndex = iHeadIndex & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);

} // end CwDSP_WriteToPlotterFifo()

//----------------------------------------------------------------------------
T_CwDSP_PlotterSample* CwDSP_ReadFromPlotterFifo( T_CwDSP *pCwDSP, double dblTimestamp_s )
  // Returns a pointer into the audio-CW-decoder's "plotter FIFO"
  //  when an entry for the requested timestamp is available (+/- a few milliseconds),
  //  otherwise NULL (which means "the requested sample point
  //            has not arrived at the end of the signal processing chain yet").
  // Callers: ScopeDisplay_AppendSamplesFromDSP(), .. (?) .
{
  int i, j, iNewest, iOldest, iSpan, nSamplesAvailable = pCwDSP->iPlotterSampleFifoUsage;
  T_CwDSP_PlotterSample *pNewest, *pOldest, *pBest, *pBetter;
  double deltaT_s, dblSecondsPerSample, d1, d2;
  if( nSamplesAvailable >= 2 )
   {
     iNewest = (pCwDSP->iPlotterSampleFifoHeadIndex - 1 + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
     iOldest = (pCwDSP->iPlotterSampleFifoHeadIndex + 2 + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
     if( iOldest >= nSamplesAvailable )
      {  iOldest = 0;  // no circular buffer index wrap yet -> oldest entry at index ZERO
      }
     // In this lock-free circular FIFO, the HEAD INDEX is the next to be written.
     // Bcause FIFO-reader and -writer run in different threads,
     //   assume the entry at the HEAD INDEX isn't safe for reading
     //   because it may be overwritten at any time.
     //   The same applies to the entry at < FifoHeadIndex + 1 > .
     pOldest = &pCwDSP->sPlotterSampleFifo[ iOldest ];
     pNewest = &pCwDSP->sPlotterSampleFifo[ iNewest ];
     if( (dblTimestamp_s > pNewest->dblTimestamp_s)  // no sample with the requested timestamp in the buffer YET ..
       ||(dblTimestamp_s < pOldest->dblTimestamp_s)) // or not in the buffer ANYMORE ("too old") ?
      { return NULL;  // no data available for e.g. the "timing scope display")
      }
     deltaT_s = pNewest->dblTimestamp_s - pOldest->dblTimestamp_s;
     if( deltaT_s <= 0.0 ) // oops.. something fishy with the timestamps -> bail out, too
      { return NULL;
      }
     iSpan = ( (iNewest - iOldest) + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
     // -> number of SAMPLE POINTS spanning 'delta T', unwrapped for the CIRCULAR buffer
     if( iSpan < 2 )
      { return NULL;
      }
     dblSecondsPerSample = deltaT_s / (double)iSpan;
     // Seen here: dblSecondsPerSample = 0.016000949; etc, due to MEASURING the actual input sampling rate in dsound_wrapper.c
     i = iOldest + (int)(0.5 + ( dblTimestamp_s - pOldest->dblTimestamp_s ) / dblSecondsPerSample);
     i = (i + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1); // unwrap again
     if( (i<0) || (i>=nSamplesAvailable) )
      { return NULL;
      }
     pBest = &pCwDSP->sPlotterSampleFifo[ i ]; // not necessarily "the best" (for the wanted timestamp) ...
     d1 = pBest->dblTimestamp_s - dblTimestamp_s;  // difference between the 'wanted' timestamp and the sample's REAL one..
     // With pCwDSP->dblPlotterSamplingInterval_s = 0.016, fabs(d1) should be <= 0.08 .
     // But sometimes, due to the jitter of timestamps for the AUDIO INPUT in dsound_wrapper.c,
     // the "best" samples left or right neighbour were even closer, and fabs(d2) was less than fabs(d1).
     if( d1 > 0.0 ) // the sample's timestamp is TOO HIGH (so it is "too new") ->
      { // a "better" sample (closer to the wanted dblTimestamp_s) may be at i-1  ...
        j = (i-1 + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
        pBetter = &pCwDSP->sPlotterSampleFifo[ j ];
        d2 = pBetter->dblTimestamp_s - dblTimestamp_s;
        if( fabs(d2) < d1 )
         { pBest = pBetter; // "this one is better than the predicted 'best'  !"
           // got here with: { d1=+0.0203, d2=-0.0148 };
           //                { d1=+0.0183, d2=-0.0129 };
         }
      }
     else if( d1 < 0.0 ) // the sample's timestamp is TOO LOW ("too old") ->
      { // a "better" sample (closer to the wanted dblTimestamp_s) may be at i+1  ...
        j = (i+1) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
        pBetter = &pCwDSP->sPlotterSampleFifo[ j ];
        d2 = pBetter->dblTimestamp_s - dblTimestamp_s;
        if( fabs(d2) < -d1 )
         { pBest = pBetter;
           // got here with: { d1=-0.00647, d2=+0.00495 },
           //
         }
      }
     return pBest;
   }

  return NULL;  // "don't plot THIS SAMPLE yet, but try again later"
} // end CwDSP_ReadFromPlotterFifo()

//---------------------------------------------------------------------------
double CwDSP_GetTimestampOfOldestSampleInPlotterFifo( T_CwDSP *pCwDSP )
  // Can be used to find out WHY CwDSP_ReadFromPlotterFifo() returned NULL
  //     for a given timestamp (the sample may not be in the plotter FIFO
  //     *anymore*, or it may not be in the plotter FIFO *yet*) .
  // Returns 0.0 (as timestamp in seconds) it there's nothing in the FIFO at all.
  // Caller: ScopeDisplay_AppendSamplesFromDSP()
{
  int iOldest, nSamplesAvailable = pCwDSP->iPlotterSampleFifoUsage;
  T_CwDSP_PlotterSample *pNewest;
  if( nSamplesAvailable >= 2 )
   { iOldest = (pCwDSP->iPlotterSampleFifoHeadIndex + 2 + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
     if( iOldest >= nSamplesAvailable )
      {  iOldest = 0;  // no circular buffer index wrap yet -> oldest entry at index ZERO
      }
     return pCwDSP->sPlotterSampleFifo[ iOldest ].dblTimestamp_s;
   }
  else
   { return 0.0;
   }
} // end CwDSP_GetTimestampOfOldestSampleInPlotterFifo()

//---------------------------------------------------------------------------
double CwDSP_GetTimestampOfNewestSampleInPlotterFifo( T_CwDSP *pCwDSP )
  // Can be used to find out WHY CwDSP_ReadFromPlotterFifo() returned NULL
  //     for a given timestamp (the sample may not be in the plotter FIFO
  //     *anymore*, or it may not be in the plotter FIFO *yet*) .
  // Returns 0.0 (as timestamp in seconds) it there's nothing in the FIFO at all.
{
  int iNewest, nSamplesAvailable = pCwDSP->iPlotterSampleFifoUsage;
  T_CwDSP_PlotterSample *pNewest;
  if( nSamplesAvailable >= 2 )
   { iNewest = (pCwDSP->iPlotterSampleFifoHeadIndex - 1 + CWDSP_PLOTTER_SAMPLE_FIFO_SIZE) & (CWDSP_PLOTTER_SAMPLE_FIFO_SIZE-1);
     return pCwDSP->sPlotterSampleFifo[ iNewest ].dblTimestamp_s;
   }
  else
   { return 0.0;
   }
} // end CwDSP_GetTimestampOfNewestSampleInPlotterFifo()


/* EOF < CwDSP.c > */


