Version: 0.99-devel (using KDE 3.2 BRANCH >= 20040204, Mandrake Linux Cooker i586 - Cooker) Compiler: gcc version 3.3.2 (Mandrake Linux 10.0 3.3.2-6mdk) OS: Linux (i686) release 2.6.3-7mdk The pass filters use 'gsl_fft_real_transform' to fft the data. From the GSL documentation for this function: "Efficient modules are provided for subtransforms of length 2, 3, 4 and 5. Any remaining factors are computed with a slow, O(n^2), general-n module. " Consequently, for many vector lengths, the transform will become catestrophically slow - eg, NS = 199981 (ie, 10000 frames at 20 samples per frame vs index with 1 sample per frame) has none of the magic factors, so the vector will be transformed with an n^2 algorithm. 200001 samples (10001 frames) is ~9x faster, since it is dividable by 3, but is still very slow! The correct behavior is to use well chosen padding (eg, a linear extrapolation) up to a multiple of 2,3,5....
CVS commit by arwalker: Round up to nearest power of 2 for a faster mixed-radix FFT. CCMAIL: 87447-done@bugs.kde.org M +54 -37 filters.h 1.3 --- kdeextragear-2/kst/plugins/pass_filters/filters.h #1.2:1.3 @@ -38,6 +38,8 @@ int kst_pass_filter( gsl_fft_halfcomplex_wavetable* hc; double* pResult[1]; + double* pPadded; double dFreqValue; int iLengthData; + int iLengthDataPadded; int iReturn = -1; int iStatus; @@ -48,4 +50,10 @@ int kst_pass_filter( if( iLengthData > 0 ) { + // + // round up to the nearest power of 2... + // + iLengthDataPadded = (int)pow( 2.0, ceil( log10( (double)iLengthData ) / log10( 2.0 ) ) ); + pPadded = (double*)malloc( iLengthDataPadded * sizeof( double ) ); + if( pPadded != 0L ) { if( outArrayLens[0] != iLengthData ) { pResult[0] = (double*)realloc( outArrays[0], iLengthData * sizeof( double ) ); @@ -58,14 +66,21 @@ int kst_pass_filter( outArrayLens[0] = iLengthData; - real = gsl_fft_real_wavetable_alloc( iLengthData ); + real = gsl_fft_real_wavetable_alloc( iLengthDataPadded ); if( real != NULL ) { - work = gsl_fft_real_workspace_alloc( iLengthData ); + work = gsl_fft_real_workspace_alloc( iLengthDataPadded ); if( work != NULL ) { - memcpy( pResult[0], inArrays[0], iLengthData * sizeof( double ) ); + memcpy( pPadded, inArrays[0], iLengthData * sizeof( double ) ); + + // + // linear extrapolation on the padded values... + // + for( i=iLengthData; i<iLengthDataPadded; i++ ) { + pPadded[i] = inArrays[0][iLengthData-1] - (double)( i - iLengthData + 1 ) * ( inArrays[0][iLengthData-1] - inArrays[0][0] ) / (double)( iLengthDataPadded - iLengthData ); + } // // calculate the FFT... // - iStatus = gsl_fft_real_transform( pResult[0], 1, iLengthData, real, work ); + iStatus = gsl_fft_real_transform( pPadded, 1, iLengthDataPadded, real, work ); if( !iStatus ) { @@ -73,16 +88,17 @@ int kst_pass_filter( // apply the filter... // - for( i=0; i<iLengthData; i++ ) { - dFreqValue = 0.5 * (double)i / (double)iLengthData; - pResult[0][i] *= filter_calculate( dFreqValue, inScalars ); + for( i=0; i<iLengthDataPadded; i++ ) { + dFreqValue = 0.5 * (double)i / (double)iLengthDataPadded; + pPadded[i] *= filter_calculate( dFreqValue, inScalars ); } - hc = gsl_fft_halfcomplex_wavetable_alloc( iLengthData ); + hc = gsl_fft_halfcomplex_wavetable_alloc( iLengthDataPadded ); if( hc != NULL ) { // // calculate the inverse FFT... // - iStatus = gsl_fft_halfcomplex_inverse( pResult[0], 1, iLengthData, hc, work ); + iStatus = gsl_fft_halfcomplex_inverse( pPadded, 1, iLengthDataPadded, hc, work ); if( !iStatus ) { + memcpy( outArrays[0], pPadded, iLengthData * sizeof( double ) ); iReturn = 0; } @@ -95,5 +111,6 @@ int kst_pass_filter( gsl_fft_real_wavetable_free( real ); } - iReturn = 0; + } + free( pPadded ); } }