diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu index 2f1cd6c49987a59df0af91e4f4762344cba1f158..2f0e4a3029f5077dce06ee730654fdeb52bc9eb6 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu @@ -181,7 +181,9 @@ extern "C" { #if defined DELAY_COMPENSATION DelaysType delaysAtBegin = (DelaysType)delaysAtBeginPtr; DelaysType delaysAfterEnd = (DelaysType)delaysAfterEndPtr; +#if not defined DOPPLER_CORRECTION Phase0sType phase0s = (Phase0sType)phase0sPtr; +#endif /* * Delay compensation means rotating the phase of each sample BACK. @@ -222,8 +224,15 @@ extern "C" { // Calculate the angles to rotate for for the first and (beyond the) last sample. // // We need to undo the delay, so we rotate BACK, resulting in a negative constant factor. +#if defined DOPPLER_CORRECTION + // Since Doppler correction has already been applied at subbandFrequency, + // we only correct the increment for each channel frequency + const double2 phiAtBegin = -2.0 * M_PI * (frequency-subbandFrequency) * delayAtBegin; + const double2 phiAfterEnd = -2.0 * M_PI * (frequency-subbandFrequency) * delayAfterEnd; +#else const double2 phiAtBegin = -2.0 * M_PI * frequency * delayAtBegin - (*phase0s)[delayIdx]; const double2 phiAfterEnd = -2.0 * M_PI * frequency * delayAfterEnd - (*phase0s)[delayIdx]; +#endif #endif for (unsigned time = timeStart; time < NR_SAMPLES_PER_CHANNEL; time += timeInc) diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu index a86868ed1df2b57bb26acd0f385e639a6d370784..8cdaaace6efbee13df7ae6d926f92c87fa4e4fba 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu @@ -98,6 +98,7 @@ typedef float (*FilteredDataType)[NR_STABS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHA typedef const float (*WeightsType)[NR_CHANNELS][NR_TAPS]; #ifdef DOPPLER_CORRECTION +#include<assert.h> // this is faster than doing "pol ? sin(phi) : cos(phi)" // because that statement forces CUDA to still compute both // as GPUs always compute both branches. @@ -112,6 +113,24 @@ inline __device__ float sincos_f2f_select(float phi, int pol) #define NR_SAMPLES_PER_SUBBAND (NR_SAMPLES_PER_CHANNEL * NR_CHANNELS) typedef const double(*DelaysType)[1][NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds typedef const double(*Phase0sType)[NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in radians + +inline __device__ float correction_factor(unsigned time, double phiAtBegin, double phiAfterEnd, int ri, int pol, unsigned channel) +{ + // Offset of this sample between begin and end. + const double blockOffset = double(time * NR_CHANNELS + channel) / NR_SAMPLES_PER_SUBBAND; + //const double blockOffset = double(channel * NR_SAMPLES_PER_CHANNEL + time) / (double)NR_SAMPLES_PER_SUBBAND; + // Interpolate the required phase rotation for this sample. + // + // Single precision angle + sincos is measured to be good enough (2013-11-20). + // Note that we do the interpolation in double precision still. + // We can afford to if we keep this kernel memory bound. + const float phi = phiAtBegin * (1.0 - blockOffset) + phiAfterEnd * blockOffset; + //if (phi>1e3) { + // printf("%d %d %e %e ",time,channel,blockOffset,phi); + //} + return sincos_f2f_select(-phi, (ri+pol)%2); +} +#define FACTOR(time) correction_factor(time, phiAtBegin, phiAfterEnd, ri, pol, channel) #endif /* DOPPLER_CORRECTION */ /*! @@ -264,26 +283,15 @@ __global__ void FIR_filter( void *filteredDataPtr, for (unsigned time = 0; time < NR_SAMPLES_PER_CHANNEL; time += NR_TAPS) { -#if defined DOPPLER_CORRECTION - // Offset of this sample between begin and end. - const double blockOffset = double(time * NR_SAMPLES_PER_CHANNEL + channel) / NR_SAMPLES_PER_SUBBAND; - // Interpolate the required phase rotation for this sample. - // - // Single precision angle + sincos is measured to be good enough (2013-11-20). - // Note that we do the interpolation in double precision still. - // We can afford to if we keep this kernel memory bound. - const float phi = phiAtBegin * (1.0 - blockOffset) + phiAfterEnd * blockOffset; - const float factor = sincos_f2f_select(phi, pol); -#endif #if defined DOPPLER_CORRECTION - delayLine_sF = convertIntToFloat(SAMPLE(time + 0))*factor; + delayLine_sF = convertIntToFloat(SAMPLE(time + 0))*FACTOR(time); #else delayLine_sF = convertIntToFloat(SAMPLE(time + 0)); #endif sum_s0 = weights_sF * delayLine_s0; #if defined DOPPLER_CORRECTION - delayLine_s0 = convertIntToFloat(SAMPLE(time + 1))*factor; + delayLine_s0 = convertIntToFloat(SAMPLE(time + 1))*FACTOR(time+1); #else delayLine_s0 = convertIntToFloat(SAMPLE(time + 1)); #endif @@ -306,7 +314,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s1 = weights_sF * delayLine_s1; #if defined DOPPLER_CORRECTION - delayLine_s1 = convertIntToFloat(SAMPLE(time + 2))*factor; + delayLine_s1 = convertIntToFloat(SAMPLE(time + 2))*FACTOR(time+2); #else delayLine_s1 = convertIntToFloat(SAMPLE(time + 2)); #endif @@ -329,7 +337,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s2 = weights_sF * delayLine_s2; #if defined DOPPLER_CORRECTION - delayLine_s2 = convertIntToFloat(SAMPLE(time + 3))*factor; + delayLine_s2 = convertIntToFloat(SAMPLE(time + 3))*FACTOR(time+3); #else delayLine_s2 = convertIntToFloat(SAMPLE(time + 3)); #endif @@ -352,7 +360,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s3 = weights_sF * delayLine_s3; #if defined DOPPLER_CORRECTION - delayLine_s3 = convertIntToFloat(SAMPLE(time + 4))*factor; + delayLine_s3 = convertIntToFloat(SAMPLE(time + 4))*FACTOR(time+4); #else delayLine_s3 = convertIntToFloat(SAMPLE(time + 4)); #endif @@ -375,7 +383,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s4 = weights_sF * delayLine_s4; #if defined DOPPLER_CORRECTION - delayLine_s4 = convertIntToFloat(SAMPLE(time + 5))*factor; + delayLine_s4 = convertIntToFloat(SAMPLE(time + 5))*FACTOR(time+5); #else delayLine_s4 = convertIntToFloat(SAMPLE(time + 5)); #endif @@ -398,7 +406,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s5 = weights_sF * delayLine_s5; #if defined DOPPLER_CORRECTION - delayLine_s5 = convertIntToFloat(SAMPLE(time + 6))*factor; + delayLine_s5 = convertIntToFloat(SAMPLE(time + 6))*FACTOR(time+6); #else delayLine_s5 = convertIntToFloat(SAMPLE(time + 6)); #endif @@ -421,7 +429,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s6 = weights_sF * delayLine_s6; #if defined DOPPLER_CORRECTION - delayLine_s6 = convertIntToFloat(SAMPLE(time + 7))*factor; + delayLine_s6 = convertIntToFloat(SAMPLE(time + 7))*FACTOR(time+7); #else delayLine_s6 = convertIntToFloat(SAMPLE(time + 7)); #endif @@ -444,7 +452,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s7 = weights_sF * delayLine_s7; #if defined DOPPLER_CORRECTION - delayLine_s7 = convertIntToFloat(SAMPLE(time + 8))*factor; + delayLine_s7 = convertIntToFloat(SAMPLE(time + 8))*FACTOR(time+8); #else delayLine_s7 = convertIntToFloat(SAMPLE(time + 8)); #endif @@ -467,7 +475,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s8 = weights_sF * delayLine_s8; #if defined DOPPLER_CORRECTION - delayLine_s8 = convertIntToFloat(SAMPLE(time + 9))*factor; + delayLine_s8 = convertIntToFloat(SAMPLE(time + 9))*FACTOR(time+9); #else delayLine_s8 = convertIntToFloat(SAMPLE(time + 9)); #endif @@ -490,7 +498,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_s9 = weights_sF * delayLine_s9; #if defined DOPPLER_CORRECTION - delayLine_s9 = convertIntToFloat(SAMPLE(time + 10))*factor; + delayLine_s9 = convertIntToFloat(SAMPLE(time + 10))*FACTOR(time+10); #else delayLine_s9 = convertIntToFloat(SAMPLE(time + 10)); #endif @@ -513,7 +521,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_sA = weights_sF * delayLine_sA; #if defined DOPPLER_CORRECTION - delayLine_sA = convertIntToFloat(SAMPLE(time + 11))*factor; + delayLine_sA = convertIntToFloat(SAMPLE(time + 11))*FACTOR(time+11); #else delayLine_sA = convertIntToFloat(SAMPLE(time + 11)); #endif @@ -536,7 +544,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_sB = weights_sF * delayLine_sB; #if defined DOPPLER_CORRECTION - delayLine_sB = convertIntToFloat(SAMPLE(time + 12))*factor; + delayLine_sB = convertIntToFloat(SAMPLE(time + 12))*FACTOR(time+12); #else delayLine_sB = convertIntToFloat(SAMPLE(time + 12)); #endif @@ -559,7 +567,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_sC = weights_sF * delayLine_sC; #if defined DOPPLER_CORRECTION - delayLine_sC = convertIntToFloat(SAMPLE(time + 13))*factor; + delayLine_sC = convertIntToFloat(SAMPLE(time + 13))*FACTOR(time+13); #else delayLine_sC = convertIntToFloat(SAMPLE(time + 13)); #endif @@ -582,7 +590,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_sD = weights_sF * delayLine_sD; #if defined DOPPLER_CORRECTION - delayLine_sD = convertIntToFloat(SAMPLE(time + 14))*factor; + delayLine_sD = convertIntToFloat(SAMPLE(time + 14))*FACTOR(time+14); #else delayLine_sD = convertIntToFloat(SAMPLE(time + 14)); #endif @@ -605,7 +613,7 @@ __global__ void FIR_filter( void *filteredDataPtr, sum_sE = weights_sF * delayLine_sE; #if defined DOPPLER_CORRECTION - delayLine_sE = convertIntToFloat(SAMPLE(time + 15))*factor; + delayLine_sE = convertIntToFloat(SAMPLE(time + 15))*FACTOR(time+15); #else delayLine_sE = convertIntToFloat(SAMPLE(time + 15)); #endif @@ -649,17 +657,8 @@ __global__ void FIR_filter( void *filteredDataPtr, { #if defined DOPPLER_CORRECTION const unsigned time = NR_SAMPLES_PER_CHANNEL - (NR_TAPS - 1) + tap; - // Offset of this sample between begin and end. - const double blockOffset = double(time * NR_SAMPLES_PER_CHANNEL + channel) / NR_SAMPLES_PER_SUBBAND; - // Interpolate the required phase rotation for this sample. - // - // Single precision angle + sincos is measured to be good enough (2013-11-20). - // Note that we do the interpolation in double precision still. - // We can afford to if we keep this kernel memory bound. - const float phi = phiAtBegin * (1.0 - blockOffset) + phiAfterEnd * blockOffset; - const float factor = sincos_f2f_select(phi, pol); (*historyData)[subbandIdx][station][tap][channel][pol_ri] = // FIXME : subbandIdx=1 assumed here - convertIntToFloat(SAMPLE(time))*factor; + convertIntToFloat(SAMPLE(time))*FACTOR(time); #else (*historyData)[subbandIdx][station][tap][channel][pol_ri] = // FIXME : subbandIdx=1 assumed here SAMPLE(NR_SAMPLES_PER_CHANNEL - (NR_TAPS - 1) + tap);