diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu
index d4eb2e41c8c7d763486e6325590412a5c8f5a56d..741439c76286e0c50377cca6162561be56d3de31 100644
--- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu
+++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu
@@ -119,23 +119,19 @@ inline __device__ float sincos_f2f_select(float phi, int pol)
 typedef  const double(*DelaysType)[1][NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds
 typedef  const double(*Phase0sType)[NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in radians
 
-inline __device__ float correction_factor(unsigned time, double phiAtBegin, double phiAfterEnd, int ri, int pol, unsigned channel)
+inline __device__ float correction_factor(unsigned time, double phiGradient, int pol, unsigned channel)
 {
-    // Offset of this sample between begin and end.
+    // Offset of this sample between begin and end. = t/T fraction
     const double blockOffset = double(time * NR_CHANNELS + channel) / NR_SAMPLES_PER_SUBBAND;
-    //const double blockOffset = double(channel * NR_SAMPLES_PER_CHANNEL + time) / (double)NR_SAMPLES_PER_SUBBAND;
     // Interpolate the required phase rotation for this sample.
     //
     // Single precision angle + sincos is measured to be good enough (2013-11-20).
     // Note that we do the interpolation in double precision still.
     // We can afford to if we keep this kernel memory bound.
-    const float phi = phiAtBegin  * (1.0 - blockOffset) + phiAfterEnd * blockOffset;
-    //if (phi>1e3) {
-    // printf("%d %d %e %e ",time,channel,blockOffset,phi);
-    //}
-    return sincos_f2f_select(-phi, pol);
+    const float phi = phiGradient  * blockOffset;
+    return sincos_f2f_select(phi, pol);
 }
-#define FACTOR(time) correction_factor(time, phiAtBegin, phiAfterEnd, ri, pol, channel)
+#define FACTOR(time) correction_factor(time, phiGradient, pol, channel)
 #endif /* DOPPLER_CORRECTION */
 
 /*!
@@ -221,8 +217,16 @@ __global__ void FIR_filter( void *filteredDataPtr,
   //
   // We need to undo the delay, so we rotate BACK, resulting in a negative constant factor.
   // Note we use the sample frequency=CLOCK_MHZ/1024 (MHz) to normalize the frequency
-  const double phiAtBegin = -2.0 * M_PI * subbandFrequency * delayAtBegin/(CLOCK_MHZ*1e6/1024.0)- phase0;
-  const double phiAfterEnd = -2.0 * M_PI * subbandFrequency * delayAfterEnd/(CLOCK_MHZ*1e6/1024.0)- phase0;
+  // Let tau_0: delay at begin, tau_1: delay after end 
+  // t : time within a block, T: total time (duration) of block,
+  // So, t in [0,T] and  t/T=blockoffset in [0,1]
+  // then delay at t = tau_1 (t/T) + tau_0 (1 - t/T)
+  // exponent at frqeuency f = -j 2 pi (f tau) 
+  // simplifying, exponent = -j 2 pi f tau_0 - j 2 pi f (tau_1 -tau_0) (t/T)
+  // We only need the term that changes with t, so discard the rest
+  // and only keep : 2 pi f (tau_1 - tau_0) (t/T) for the rest of calculations
+  // also replace f with f/f_s where f_s is sample frequency = clock/1024
+  const double phiGradient = 2.0 * M_PI * (subbandFrequency / (CLOCK_MHZ*1e6/1024.0) )*( delayAfterEnd - delayAtBegin );
 #endif
 
   //# const float16 weights = (*weightsData)[channel];