diff --git a/.gitattributes b/.gitattributes index 2599f895cd04799b0cabd4b9f5a74e061f738bac..182e9382ca554b43013fdc4bb52a1931a425db4b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3700,13 +3700,9 @@ RTCP/CNProc/test/tPencilBeamFormer.sh -text RTCP/CNProc/test/tStokes.cc -text RTCP/CNProc/test/tStokes.sh -text RTCP/CNProc/test/tStokesAsm.cc -text -RTCP/Cobalt/CMakeLists.txt -text RTCP/Cobalt/CoInterface/test/tParset.parset_obs99275 -text RTCP/Cobalt/CoInterface/test/tParset.sh -text -RTCP/Cobalt/GPUProc/CMakeLists.txt -text -RTCP/Cobalt/GPUProc/src/CMakeLists.txt -text RTCP/Cobalt/GPUProc/src/backward/CL/cl.hpp -text -RTCP/Cobalt/GPUProc/test/CMakeLists.txt -text RTCP/Cobalt/GPUProc/test/DummyStorage.debug -text RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.run eol=lf RTCP/Cobalt/GPUProc/test/cuda/tCudaRuntimeCompiler.sh eol=lf @@ -3715,41 +3711,43 @@ RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.sh eol=lf RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.parset.77_Stations -text RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.parset.AARTFAAC -text RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.parset.small-test -text -RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.run -text -RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.sh -text +RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.run eol=lf +RTCP/Cobalt/GPUProc/test/cuda/tFIR_Filter.sh eol=lf RTCP/Cobalt/GPUProc/test/cuda/tKernel.sh eol=lf -RTCP/Cobalt/GPUProc/test/cuda/t_cuda_complex.cu -text RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.run eol=lf RTCP/Cobalt/GPUProc/test/cuda/tcreateProgram.sh eol=lf RTCP/Cobalt/GPUProc/test/opencl/RTCP_UnitTest.parset.77_Stations -text RTCP/Cobalt/GPUProc/test/opencl/RTCP_UnitTest.parset.AARTFAAC -text RTCP/Cobalt/GPUProc/test/opencl/RTCP_UnitTest.parset.small-test -text +RTCP/Cobalt/GPUProc/test/opencl/RTCP_UnitTest.run eol=lf RTCP/Cobalt/GPUProc/test/opencl/cmpfloat.py -text +RTCP/Cobalt/GPUProc/test/opencl/tContext.run eol=lf RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.output/SB0.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.output/SB1.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.output/SB2.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.output/SB3.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.output/SB4.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.parset -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.run -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.sh -text +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.run eol=lf +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_1sec_1st_5sb_noflagging.sh eol=lf RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.output/SB0.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.output/SB1.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.output/SB2.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.output/SB3.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.output/SB4.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.parset -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.run -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.sh -text +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.run eol=lf +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_1st_5sb.sh eol=lf RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.output/SB0.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.output/SB1.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.output/SB2.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.output/SB3.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.output/SB4.MS -text RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.parset -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.run -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.sh -text -RTCP/Cobalt/GPUProc/test/opencl/tCorrelatorWorkQueue.cc -text +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.run eol=lf +RTCP/Cobalt/GPUProc/test/opencl/tCorrelate_3sec_2st_5sb.sh eol=lf +RTCP/Cobalt/GPUProc/test/opencl/tGPUPerformance.run eol=lf +RTCP/Cobalt/GPUProc/test/opencl/tPerformanceCounter.run eol=lf RTCP/Cobalt/GPUProc/test/tStorageProcesses.sh -text RTCP/Cobalt/GPUProc/test/tStorageProcesses.stdout -text RTCP/Cobalt/InputProc/CMakeLists.txt -text @@ -3821,7 +3819,6 @@ RTCP/Cobalt/InputProc/test/tSampleBuffer.sh -text RTCP/Cobalt/InputProc/test/tSampleBufferSync.cc -text RTCP/Cobalt/InputProc/test/tSampleBufferSync.sh -text RTCP/Cobalt/InputProc/test/tSharedMemory.sh -text -RTCP/Cobalt/OpenCL_FFT/CMakeLists.txt -text RTCP/Cobalt/OpenCL_FFT/src/AccelerateError.pdf -text RTCP/Cobalt/OpenCL_FFT/src/CMakeLists.txt -text RTCP/Cobalt/OpenCL_FFT/src/Error.pdf -text diff --git a/RTCP/Cobalt/CMakeLists.txt b/RTCP/Cobalt/CMakeLists.txt index d48a7562bcd6ef0d20ab700a80b7b360d41957e6..a246a3a48359537a39e07277a9bb91669124aebf 100644 --- a/RTCP/Cobalt/CMakeLists.txt +++ b/RTCP/Cobalt/CMakeLists.txt @@ -1,4 +1,4 @@ -# $Id: CMakeLists.txt 24129 2013-03-08 14:48:23Z mol $ +# $Id$ lofar_add_package(InputProc) # GPU cluster: Station Input Retrieval and Redistribution applications lofar_add_package(OutputProc) # GPU cluster: Data Storage applications diff --git a/RTCP/Cobalt/GPUProc/CMakeLists.txt b/RTCP/Cobalt/GPUProc/CMakeLists.txt index a090a766bd6a95fb3073453f39b09d9ddad3d9ce..8bda2b33c6b7a58742ffba2cbb86b9b7d32c4434 100644 --- a/RTCP/Cobalt/GPUProc/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/CMakeLists.txt @@ -1,4 +1,4 @@ -# $Id: CMakeLists.txt 16350 2010-09-20 13:14:52Z nieuwpoort $ +# $Id$ # Handle options USE_CUDA and USE_OPENCL. if(USE_CUDA AND NOT USE_OPENCL) diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt index 43d6cff7cd96c0ec0764773f5e092684e747480f..775e97227f49819c160607baab2b01af460120a3 100644 --- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt @@ -1,4 +1,4 @@ -# $Id: CMakeLists.txt 17003 2011-01-06 08:54:59Z romein $ +# $Id$ include(LofarPackageVersion) diff --git a/RTCP/Cobalt/GPUProc/test/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/CMakeLists.txt index 71a5529df9669e008bf3aae23f683d047c54985f..73d711f37595a1a009054a99e862ae7e17711bff 100644 --- a/RTCP/Cobalt/GPUProc/test/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/test/CMakeLists.txt @@ -1,4 +1,4 @@ -# $Id: CMakeLists.txt 13414 2009-06-16 22:15:37Z loose $ +# $Id$ include(LofarCTest) diff --git a/RTCP/Cobalt/GPUProc/test/cuda/t_cuda_complex.cu b/RTCP/Cobalt/GPUProc/test/cuda/t_cuda_complex.cu index 4480303e9ad42a7e717922a16cb46928984629b7..81f7ddb2279a5e2f22d7fa7fdd3d290100db8de4 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/t_cuda_complex.cu +++ b/RTCP/Cobalt/GPUProc/test/cuda/t_cuda_complex.cu @@ -1,164 +1,164 @@ -//# t_cuda_complex.cu -//# Copyright (C) 2013 ASTRON (Netherlands Institute for Radio Astronomy) -//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands -//# -//# This file is part of the LOFAR software suite. -//# The LOFAR software suite is free software: you can redistribute it and/or -//# modify it under the terms of the GNU General Public License as published -//# by the Free Software Foundation, either version 3 of the License, or -//# (at your option) any later version. -//# -//# The LOFAR software suite is distributed in the hope that it will be useful, -//# but WITHOUT ANY WARRANTY; without even the implied warranty of -//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -//# GNU General Public License for more details. -//# -//# You should have received a copy of the GNU General Public License along -//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. -//# -//# $Id$ - -#include <lofar_config.h> - -#include "cuda_runtime.h" -#include "device_launch_parameters.h" - -#include <stdio.h> -#include <iostream> -#include <complex> - -#include <GPUProc/complex.h> - -cudaError_t addWithCuda( std::complex<float> * output_complex, const std::complex<float> * input_complex, size_t size); - -__global__ void addKernel( void *in_ptr, const void *out_ptr) -{ - int i = threadIdx.x; - // Cast to complex - - LOFAR::Cobalt::gpu::complex<float>*in = (LOFAR::Cobalt::gpu::complex<float>*) in_ptr; - LOFAR::Cobalt::gpu::complex<float>*out = (LOFAR::Cobalt::gpu::complex<float>*) out_ptr; - - //do some computations, We are not testing the correctness of the implementation here. - out[i] = in[i] + in[i]; - out[i] -= in[i]; - out[i] = out[i]; - out[i] *= 10.0; -} - -using namespace std; -int main() -{ - const int arraySize = 5; - // insert some values - const complex<float> complex_in[5] = { complex<float>(1.0,1.0), - complex<float>(1,-1), - complex<float>(-1,1), - complex<float>(-1,-1), - complex<float>(4,-4)}; - complex<float> complex_out[5] = { 0 }; - - // Add vectors in parallel. - cudaError_t cudaStatus = addWithCuda(complex_out, complex_in,arraySize); - if (cudaStatus == cudaErrorNoDevice) { - return 3; - } - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "addWithCuda failed!"); - return 1; - } - - const complex<float> complex_target[5] = {complex<float>(10,10), - complex<float>(10, -10), - complex<float>(-10,10), - complex<float>(-10,-10), - complex<float>(40,-40)}; - - - // validate that the output of the kernel is correct! - if (complex_out[0] == complex_target[0] && - complex_out[1] == complex_target[1] && - complex_out[2] == complex_target[2] && - complex_out[3] == complex_target[3] && - complex_out[4] == complex_target[4] - ) - { - return 0; - } - else //print the output data and return -1 - { - cout << "The complex values returned from the device were incorrect:" << endl; - cout << "complex numbers, expected - received: {"; - for (int idx =0; idx < 5 ;++idx) - { - cout << complex_target[idx] << " - " << complex_out[idx] ; - if (complex_target[idx] != complex_out[idx]) - cout << "<<<"; - cout << endl; - } - cout << " }" << endl; - return -1; - } -} - - -// Helper function for using CUDA to add vectors in parallel. -cudaError_t addWithCuda(std::complex<float>* output_complex, - const std::complex<float>* input_complex, - size_t size) -{ - std::complex<float> *dev_in = 0; - std::complex<float> *dev_out = 0; - cudaError_t cudaStatus; - - // Choose which GPU to run on, change this on a multi-GPU system. - cudaStatus = cudaSetDevice(0); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?\n"); - goto Error; - } - - // allocate the complex buffers - cudaStatus = cudaMalloc((void**)&dev_in, size * sizeof(std::complex<float>)); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMalloc failed!"); - goto Error; - } - - cudaStatus = cudaMalloc((void**)&dev_out, size * sizeof(std::complex<float>)); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMalloc failed!"); - goto Error; - } - - cudaStatus = cudaMemcpy(dev_in, input_complex, size * sizeof(std::complex<float>), cudaMemcpyHostToDevice); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - - // Launch a kernel on the GPU with one thread for each element. - addKernel<<<1, size>>>((void *) dev_in, (const void *) dev_out); - - // cudaDeviceSynchronize waits for the kernel to finish, and returns - // any errors encountered during the launch. - cudaStatus = cudaDeviceSynchronize(); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); - goto Error; - } - - cudaStatus = cudaMemcpy(output_complex, dev_out, size * sizeof(std::complex<float>), cudaMemcpyDeviceToHost); - if (cudaStatus != cudaSuccess) { - fprintf(stderr, "cudaMemcpy failed!"); - goto Error; - } - - -Error: - cudaFree(dev_in); - cudaFree(dev_out); - - return cudaStatus; -} - +//# t_cuda_complex.cu +//# Copyright (C) 2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#include <lofar_config.h> + +#include "cuda_runtime.h" +#include "device_launch_parameters.h" + +#include <stdio.h> +#include <iostream> +#include <complex> + +#include <GPUProc/complex.h> + +cudaError_t addWithCuda( std::complex<float> * output_complex, const std::complex<float> * input_complex, size_t size); + +__global__ void addKernel( void *in_ptr, const void *out_ptr) +{ + int i = threadIdx.x; + // Cast to complex + + LOFAR::Cobalt::gpu::complex<float>*in = (LOFAR::Cobalt::gpu::complex<float>*) in_ptr; + LOFAR::Cobalt::gpu::complex<float>*out = (LOFAR::Cobalt::gpu::complex<float>*) out_ptr; + + //do some computations, We are not testing the correctness of the implementation here. + out[i] = in[i] + in[i]; + out[i] -= in[i]; + out[i] = out[i]; + out[i] *= 10.0; +} + +using namespace std; +int main() +{ + const int arraySize = 5; + // insert some values + const complex<float> complex_in[5] = { complex<float>(1.0,1.0), + complex<float>(1,-1), + complex<float>(-1,1), + complex<float>(-1,-1), + complex<float>(4,-4)}; + complex<float> complex_out[5] = { 0 }; + + // Add vectors in parallel. + cudaError_t cudaStatus = addWithCuda(complex_out, complex_in,arraySize); + if (cudaStatus == cudaErrorNoDevice) { + return 3; + } + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "addWithCuda failed!"); + return 1; + } + + const complex<float> complex_target[5] = {complex<float>(10,10), + complex<float>(10, -10), + complex<float>(-10,10), + complex<float>(-10,-10), + complex<float>(40,-40)}; + + + // validate that the output of the kernel is correct! + if (complex_out[0] == complex_target[0] && + complex_out[1] == complex_target[1] && + complex_out[2] == complex_target[2] && + complex_out[3] == complex_target[3] && + complex_out[4] == complex_target[4] + ) + { + return 0; + } + else //print the output data and return -1 + { + cout << "The complex values returned from the device were incorrect:" << endl; + cout << "complex numbers, expected - received: {"; + for (int idx =0; idx < 5 ;++idx) + { + cout << complex_target[idx] << " - " << complex_out[idx] ; + if (complex_target[idx] != complex_out[idx]) + cout << "<<<"; + cout << endl; + } + cout << " }" << endl; + return -1; + } +} + + +// Helper function for using CUDA to add vectors in parallel. +cudaError_t addWithCuda(std::complex<float>* output_complex, + const std::complex<float>* input_complex, + size_t size) +{ + std::complex<float> *dev_in = 0; + std::complex<float> *dev_out = 0; + cudaError_t cudaStatus; + + // Choose which GPU to run on, change this on a multi-GPU system. + cudaStatus = cudaSetDevice(0); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaSetDevice failed! Do you have a CUDA-capable GPU installed?\n"); + goto Error; + } + + // allocate the complex buffers + cudaStatus = cudaMalloc((void**)&dev_in, size * sizeof(std::complex<float>)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMalloc((void**)&dev_out, size * sizeof(std::complex<float>)); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMalloc failed!"); + goto Error; + } + + cudaStatus = cudaMemcpy(dev_in, input_complex, size * sizeof(std::complex<float>), cudaMemcpyHostToDevice); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + // Launch a kernel on the GPU with one thread for each element. + addKernel<<<1, size>>>((void *) dev_in, (const void *) dev_out); + + // cudaDeviceSynchronize waits for the kernel to finish, and returns + // any errors encountered during the launch. + cudaStatus = cudaDeviceSynchronize(); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaDeviceSynchronize returned error code %d after launching addKernel!\n", cudaStatus); + goto Error; + } + + cudaStatus = cudaMemcpy(output_complex, dev_out, size * sizeof(std::complex<float>), cudaMemcpyDeviceToHost); + if (cudaStatus != cudaSuccess) { + fprintf(stderr, "cudaMemcpy failed!"); + goto Error; + } + + +Error: + cudaFree(dev_in); + cudaFree(dev_out); + + return cudaStatus; +} + diff --git a/RTCP/Cobalt/OpenCL_FFT/CMakeLists.txt b/RTCP/Cobalt/OpenCL_FFT/CMakeLists.txt index 4151ebacd6ead5d917acba99e83ede5642803445..7cce981f91c6fc4d9350e1f109a7a7f5fbe7fd26 100644 --- a/RTCP/Cobalt/OpenCL_FFT/CMakeLists.txt +++ b/RTCP/Cobalt/OpenCL_FFT/CMakeLists.txt @@ -1,4 +1,4 @@ -# $Id: CMakeLists.txt 17975 2011-05-10 09:52:51Z mol $ +# $Id$ lofar_package(OpenCL_FFT 1.0)